From c0802cb67b8bd245d872ad31683d092a8f1a3f95 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Tue, 7 Apr 2026 09:29:32 -0700 Subject: [PATCH 01/19] =?UTF-8?q?feat(optimizer):=20add=20analyzer=20app?= =?UTF-8?q?=20=E2=80=94=20continuous=20table=20operation=20scheduling?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces apps/optimizer-analyzer, a Spring Boot CommandLineRunner that evaluates every table in table_stats against pluggable OperationAnalyzer strategies. The first strategy, OrphanFilesDeletionAnalyzer, schedules OFD operations with 24h success / 1h failure retry cadence, a 6h SCHEDULED timeout, and a 5-strike circuit breaker. Key design choices: - Bulk-loads operations and history into maps (one query per type), then iterates the stats list — O(types) queries, not O(tables). - Uses the existing generic find() repository methods with null params. - Pure unit tests with Mockito — no Spring context needed. Co-Authored-By: Claude Opus 4.6 --- apps/optimizer-analyzer/build.gradle | 20 ++ .../analyzer/AnalyzerApplication.java | 25 ++ .../openhouse/analyzer/AnalyzerRunner.java | 185 ++++++++++++ .../openhouse/analyzer/CadencePolicy.java | 74 +++++ .../openhouse/analyzer/OperationAnalyzer.java | 43 +++ .../analyzer/OrphanFilesDeletionAnalyzer.java | 55 ++++ .../analyzer/config/AnalyzerConfig.java | 7 + .../analyzer/model/TableOperationRecord.java | 23 ++ .../analyzer/model/TableSummary.java | 26 ++ .../src/main/resources/application.properties | 9 + .../analyzer/AnalyzerRunnerTest.java | 270 ++++++++++++++++++ .../OrphanFilesDeletionAnalyzerTest.java | 242 ++++++++++++++++ settings.gradle | 1 + 13 files changed, 980 insertions(+) create mode 100644 apps/optimizer-analyzer/build.gradle create mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerApplication.java create mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java create mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java create mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java create mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzer.java create mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/config/AnalyzerConfig.java create mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperationRecord.java create mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableSummary.java create mode 100644 apps/optimizer-analyzer/src/main/resources/application.properties create mode 100644 apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java create mode 100644 apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzerTest.java diff --git a/apps/optimizer-analyzer/build.gradle b/apps/optimizer-analyzer/build.gradle new file mode 100644 index 000000000..af4def95a --- /dev/null +++ b/apps/optimizer-analyzer/build.gradle @@ -0,0 +1,20 @@ +plugins { + id 'openhouse.springboot-ext-conventions' + id 'org.springframework.boot' version '2.7.8' +} + +dependencies { + implementation project(':apps:optimizer') + implementation 'org.springframework.boot:spring-boot-starter:2.7.8' + implementation 'org.springframework.boot:spring-boot-starter-webflux:2.7.8' + implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8' + implementation 'org.springframework.boot:spring-boot-starter-aop:2.7.8' + runtimeOnly 'mysql:mysql-connector-java:8.0.33' + testImplementation 'org.springframework.boot:spring-boot-starter-test:2.7.8' + testImplementation 'com.squareup.okhttp3:mockwebserver:4.10.0' + testRuntimeOnly 'com.h2database:h2' +} + +test { + useJUnitPlatform() +} diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerApplication.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerApplication.java new file mode 100644 index 000000000..99ba56047 --- /dev/null +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerApplication.java @@ -0,0 +1,25 @@ +package com.linkedin.openhouse.analyzer; + +import org.springframework.boot.CommandLineRunner; +import org.springframework.boot.SpringApplication; +import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.boot.autoconfigure.domain.EntityScan; +import org.springframework.context.annotation.Bean; +import org.springframework.data.jpa.repository.config.EnableJpaRepositories; + +/** Entry point for the Optimizer Analyzer application. */ +@SpringBootApplication +@EntityScan(basePackages = "com.linkedin.openhouse.optimizer.entity") +@EnableJpaRepositories(basePackages = "com.linkedin.openhouse.optimizer.repository") +public class AnalyzerApplication { + + public static void main(String[] args) { + SpringApplication.run(AnalyzerApplication.class, args); + } + + /** Delegates to {@link AnalyzerRunner#analyze()} once per process invocation. */ + @Bean + public CommandLineRunner run(AnalyzerRunner runner) { + return args -> runner.analyze(); + } +} diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java new file mode 100644 index 000000000..5ad568d49 --- /dev/null +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java @@ -0,0 +1,185 @@ +package com.linkedin.openhouse.analyzer; + +import com.linkedin.openhouse.analyzer.model.TableOperationRecord; +import com.linkedin.openhouse.analyzer.model.TableSummary; +import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; +import com.linkedin.openhouse.optimizer.entity.TableOperationRow; +import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import com.linkedin.openhouse.optimizer.repository.TableOperationHistoryRepository; +import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; +import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; +import java.time.Instant; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.UUID; +import java.util.stream.Collectors; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.data.domain.Pageable; +import org.springframework.stereotype.Component; + +/** + * Core analysis loop. Loads all {@code table_stats} rows and evaluates each table against every + * registered {@link OperationAnalyzer} in a single pass. + * + *

The two sides of the join — current operations and circuit-breaker history — are loaded into + * memory once per run before the table loop. Both are naturally bounded (only tables with active or + * recently failed operations have rows), so holding them in maps is safe at any table scale. + */ +@Slf4j +@Component +@RequiredArgsConstructor +public class AnalyzerRunner { + + private final List analyzers; + private final TableStatsRepository statsRepo; + private final TableOperationsRepository operationsRepo; + private final TableOperationHistoryRepository historyRepo; + + /** Run the full analysis loop once. */ + public void analyze() { + // Pre-load the small sides of the joins — one query per analyzer type. + Map> opsByType = + analyzers.stream() + .collect( + Collectors.toMap( + OperationAnalyzer::getOperationType, a -> loadOpsMap(a.getOperationType()))); + + Map>> historyByType = + analyzers.stream() + .collect( + Collectors.toMap( + OperationAnalyzer::getOperationType, + a -> loadHistoryMap(a.getOperationType()))); + + List tableList = + statsRepo.find(null, null, null).stream() + .filter(row -> row.getTableUuid() != null) + .collect(Collectors.toList()); + log.info("Found {} tables in optimizer table_stats", tableList.size()); + + tableList.forEach( + row -> { + TableSummary table = toSummary(row); + analyzers.forEach( + analyzer -> { + String type = analyzer.getOperationType(); + Optional currentOp = + Optional.ofNullable(opsByType.get(type).get(row.getTableUuid())); + List history = + historyByType + .get(type) + .getOrDefault(row.getTableUuid(), Collections.emptyList()); + + Optional latestHistory = history.stream().findFirst(); + + if (analyzer.isEnabled(table) + && analyzer.shouldSchedule(table, currentOp, latestHistory) + && !isCircuitBroken(analyzer, row.getTableUuid(), history)) { + operationsRepo.save(buildOperation(row, type)); + log.info( + "Created PENDING {} operation for table {}.{}", + type, + row.getDatabaseId(), + row.getTableName()); + } + }); + }); + + log.info("Analysis complete"); + } + + /** + * Loads the most recent operation record per table for the given type. Deduplicates by keeping + * the newer row when a table has more than one active record. + */ + private Map loadOpsMap(String operationType) { + Map map = + operationsRepo.find(operationType, null, null, null, null).stream() + .filter(e -> e.getTableUuid() != null) + .collect( + Collectors.toMap( + TableOperationRow::getTableUuid, + AnalyzerRunner::toRecord, + (a, b) -> mostRecent(a, b))); + log.info("Analyzer {} found {} tables with operation history", operationType, map.size()); + return map; + } + + /** + * Loads all history rows for the given type and groups them by {@code tableUuid}, newest first. + * Called once per analyzer type to eliminate per-table N+1 queries in the circuit breaker check. + */ + private Map> loadHistoryMap(String operationType) { + return historyRepo.find(operationType, null, null, null, Pageable.unpaged()).stream() + .collect(Collectors.groupingBy(TableOperationHistoryRow::getTableUuid)); + } + + private TableOperationRow buildOperation(TableStatsRow row, String operationType) { + return TableOperationRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseId()) + .tableName(row.getTableName()) + .operationType(operationType) + .status("PENDING") + .createdAt(Instant.now()) + .version(0L) + .build(); + } + + private TableSummary toSummary(TableStatsRow e) { + return TableSummary.builder() + .tableUuid(e.getTableUuid()) + .databaseId(e.getDatabaseId()) + .tableId(e.getTableName()) + .tableProperties( + e.getTableProperties() != null ? e.getTableProperties() : Collections.emptyMap()) + .stats(e.getStats()) + .build(); + } + + /** + * Returns {@code true} if the circuit breaker has tripped. Uses the pre-loaded history list + * instead of querying the DB per table. + */ + private boolean isCircuitBroken( + OperationAnalyzer analyzer, String tableUuid, List history) { + int threshold = analyzer.getCircuitBreakerThreshold(); + if (threshold <= 0 || history.size() < threshold) { + return false; + } + boolean allFailed = + history.stream().limit(threshold).allMatch(r -> "FAILED".equals(r.getStatus())); + if (allFailed) { + log.warn( + "Circuit breaker tripped for table {} operation {}: last {} attempts all FAILED", + tableUuid, + analyzer.getOperationType(), + threshold); + } + return allFailed; + } + + private static TableOperationRecord mostRecent(TableOperationRecord a, TableOperationRecord b) { + Comparator byCreatedAt = + Comparator.comparing(r -> r.getCreatedAt() != null ? r.getCreatedAt() : Instant.EPOCH); + return byCreatedAt.compare(a, b) >= 0 ? a : b; + } + + private static TableOperationRecord toRecord(TableOperationRow e) { + TableOperationRecord r = new TableOperationRecord(); + r.setId(e.getId()); + r.setTableUuid(e.getTableUuid()); + r.setDatabaseName(e.getDatabaseName()); + r.setTableName(e.getTableName()); + r.setOperationType(e.getOperationType()); + r.setStatus(e.getStatus()); + r.setCreatedAt(e.getCreatedAt()); + r.setScheduledAt(e.getScheduledAt()); + return r; + } +} diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java new file mode 100644 index 000000000..36d9ff841 --- /dev/null +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java @@ -0,0 +1,74 @@ +package com.linkedin.openhouse.analyzer; + +import com.linkedin.openhouse.analyzer.model.TableOperationRecord; +import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; +import java.time.Duration; +import java.time.Instant; +import java.util.Optional; +import lombok.RequiredArgsConstructor; + +/** + * Encapsulates the time-based scheduling logic shared across operation types. An analyzer delegates + * to {@link CadencePolicy} to decide whether to re-issue a recommendation for a table that already + * has an active operation record and/or history. + * + *

The SCHEDULED timeout is a key safety mechanism: if a Spark job crashes without reporting + * back, the SCHEDULED row would otherwise block the table forever. When the row has been SCHEDULED + * (or SCHEDULING) longer than {@code scheduledTimeout}, the Analyzer treats it as stale and returns + * {@code true}, causing a new PENDING row to be inserted. + */ +@RequiredArgsConstructor +public class CadencePolicy { + + private final Duration successRetryInterval; + private final Duration failureRetryInterval; + private final Duration scheduledTimeout; + + /** + * Returns {@code true} if a new or refreshed operation record should be upserted. + * + * @param currentOp the existing active operation record, or empty if none exists + * @param latestHistory the most recent history entry for this (table, type), or empty + */ + public boolean shouldSchedule( + Optional currentOp, Optional latestHistory) { + if (currentOp.isEmpty()) { + return decideFromHistory(latestHistory); + } + TableOperationRecord op = currentOp.get(); + switch (op.getStatus()) { + case "PENDING": + case "SCHEDULING": + return false; + case "SCHEDULED": + if (latestHistory.isEmpty()) { + return pastInterval(op.getScheduledAt(), scheduledTimeout); + } + return decideFromHistoryEntry(latestHistory.get()); + default: + return true; + } + } + + private boolean decideFromHistory(Optional latestHistory) { + if (latestHistory.isEmpty()) { + return true; + } + return decideFromHistoryEntry(latestHistory.get()); + } + + private boolean decideFromHistoryEntry(TableOperationHistoryRow entry) { + switch (entry.getStatus()) { + case "SUCCESS": + return pastInterval(entry.getSubmittedAt(), successRetryInterval); + case "FAILED": + return pastInterval(entry.getSubmittedAt(), failureRetryInterval); + default: + return true; + } + } + + private boolean pastInterval(Instant timestamp, Duration interval) { + return timestamp == null || Duration.between(timestamp, Instant.now()).compareTo(interval) > 0; + } +} diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java new file mode 100644 index 000000000..425fbdbfb --- /dev/null +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java @@ -0,0 +1,43 @@ +package com.linkedin.openhouse.analyzer; + +import com.linkedin.openhouse.analyzer.model.TableOperationRecord; +import com.linkedin.openhouse.analyzer.model.TableSummary; +import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; +import java.util.Optional; + +/** + * Strategy interface for a single operation type. Each implementation decides whether a given table + * needs an operation recommendation upserted in the Optimizer Service. + */ +public interface OperationAnalyzer { + + /** The operation type this analyzer handles (e.g., {@code "ORPHAN_FILES_DELETION"}). */ + String getOperationType(); + + /** + * Returns {@code true} if this operation is opted-in for the given table. Tables that return + * {@code false} are skipped entirely — no upsert is issued. + */ + boolean isEnabled(TableSummary table); + + /** + * Returns {@code true} if a new or refreshed operation record should be upserted. + * + * @param table the table entry + * @param currentOp the existing active operation record, or empty if none exists + * @param latestHistory the most recent history entry for this (table, type), or empty + */ + boolean shouldSchedule( + TableSummary table, + Optional currentOp, + Optional latestHistory); + + /** + * Maximum number of consecutive FAILED history entries before the circuit breaker trips and + * scheduling is suppressed for this (table, operation_type). Override per operation type. Returns + * 0 to disable the circuit breaker. + */ + default int getCircuitBreakerThreshold() { + return 5; + } +} diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzer.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzer.java new file mode 100644 index 000000000..016057aa4 --- /dev/null +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzer.java @@ -0,0 +1,55 @@ +package com.linkedin.openhouse.analyzer; + +import com.linkedin.openhouse.analyzer.model.TableOperationRecord; +import com.linkedin.openhouse.analyzer.model.TableSummary; +import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; +import java.time.Duration; +import java.util.Optional; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Component; + +/** Analyzer for the {@code ORPHAN_FILES_DELETION} operation type. */ +@Component +public class OrphanFilesDeletionAnalyzer implements OperationAnalyzer { + + static final String OPERATION_TYPE = "ORPHAN_FILES_DELETION"; + static final String OFD_ENABLED_PROPERTY = "maintenance.optimizer.ofd.enabled"; + + private final CadencePolicy cadencePolicy; + + @Autowired + public OrphanFilesDeletionAnalyzer( + @Value("${ofd.success-retry-hours:24}") long successRetryHours, + @Value("${ofd.failure-retry-hours:1}") long failureRetryHours, + @Value("${ofd.scheduled-timeout-hours:6}") long scheduledTimeoutHours) { + this.cadencePolicy = + new CadencePolicy( + Duration.ofHours(successRetryHours), + Duration.ofHours(failureRetryHours), + Duration.ofHours(scheduledTimeoutHours)); + } + + /** Package-private for tests that supply a pre-built {@link CadencePolicy}. */ + OrphanFilesDeletionAnalyzer(CadencePolicy cadencePolicy) { + this.cadencePolicy = cadencePolicy; + } + + @Override + public String getOperationType() { + return OPERATION_TYPE; + } + + @Override + public boolean isEnabled(TableSummary table) { + return "true".equals(table.getTableProperties().get(OFD_ENABLED_PROPERTY)); + } + + @Override + public boolean shouldSchedule( + TableSummary table, + Optional currentOp, + Optional latestHistory) { + return cadencePolicy.shouldSchedule(currentOp, latestHistory); + } +} diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/config/AnalyzerConfig.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/config/AnalyzerConfig.java new file mode 100644 index 000000000..30ad9f55b --- /dev/null +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/config/AnalyzerConfig.java @@ -0,0 +1,7 @@ +package com.linkedin.openhouse.analyzer.config; + +import org.springframework.context.annotation.Configuration; + +/** Spring configuration for the Analyzer. */ +@Configuration +public class AnalyzerConfig {} diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperationRecord.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperationRecord.java new file mode 100644 index 000000000..51bc4d803 --- /dev/null +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperationRecord.java @@ -0,0 +1,23 @@ +package com.linkedin.openhouse.analyzer.model; + +import java.time.Instant; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * Lightweight representation of an active table operation record. Mirrors the fields in {@link + * com.linkedin.openhouse.optimizer.entity.TableOperationRow} that the Analyzer needs. + */ +@Data +@NoArgsConstructor +public class TableOperationRecord { + + private String id; + private String tableUuid; + private String databaseName; + private String tableName; + private String operationType; + private String status; + private Instant createdAt; + private Instant scheduledAt; +} diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableSummary.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableSummary.java new file mode 100644 index 000000000..fbe166fff --- /dev/null +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableSummary.java @@ -0,0 +1,26 @@ +package com.linkedin.openhouse.analyzer.model; + +import com.linkedin.openhouse.optimizer.model.TableStats; +import java.util.Collections; +import java.util.Map; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** Internal representation of a table, decoupled from any external API response model. */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableSummary { + + private String tableUuid; + private String databaseId; + private String tableId; + + @Builder.Default private Map tableProperties = Collections.emptyMap(); + + /** Commit stats from the optimizer {@code table_stats} table. Null if no stats recorded yet. */ + private TableStats stats; +} diff --git a/apps/optimizer-analyzer/src/main/resources/application.properties b/apps/optimizer-analyzer/src/main/resources/application.properties new file mode 100644 index 000000000..990740f1d --- /dev/null +++ b/apps/optimizer-analyzer/src/main/resources/application.properties @@ -0,0 +1,9 @@ +spring.application.name=openhouse-optimizer-analyzer +spring.main.web-application-type=none +spring.datasource.url=${OPTIMIZER_DB_URL:jdbc:h2:mem:analyzerdb;DB_CLOSE_DELAY=-1;MODE=MySQL} +spring.datasource.username=${OPTIMIZER_DB_USER:sa} +spring.datasource.password=${OPTIMIZER_DB_PASSWORD:} +spring.jpa.hibernate.ddl-auto=none +ofd.success-retry-hours=24 +ofd.failure-retry-hours=1 +ofd.scheduled-timeout-hours=6 diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java new file mode 100644 index 000000000..69de877a2 --- /dev/null +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java @@ -0,0 +1,270 @@ +package com.linkedin.openhouse.analyzer; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import com.linkedin.openhouse.analyzer.model.TableOperationRecord; +import com.linkedin.openhouse.analyzer.model.TableSummary; +import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; +import com.linkedin.openhouse.optimizer.entity.TableOperationRow; +import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import com.linkedin.openhouse.optimizer.repository.TableOperationHistoryRepository; +import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; +import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; +import java.time.Instant; +import java.util.Collections; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.ArgumentCaptor; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.springframework.data.domain.Pageable; + +@ExtendWith(MockitoExtension.class) +class AnalyzerRunnerTest { + + @Mock private TableStatsRepository statsRepo; + @Mock private TableOperationsRepository operationsRepo; + @Mock private TableOperationHistoryRepository historyRepo; + @Mock private OperationAnalyzer analyzer; + + private AnalyzerRunner runner; + + @BeforeEach + void setUp() { + runner = new AnalyzerRunner(List.of(analyzer), statsRepo, operationsRepo, historyRepo); + } + + @Test + void analyze_insertsNewRow_forEligibleTableWithNoExistingOp() { + TableStatsRow statsEntity = new TableStatsRow(); + statsEntity.setTableUuid("uuid-1"); + statsEntity.setDatabaseId("db1"); + statsEntity.setTableName("tbl1"); + + TableSummary expectedTable = + TableSummary.builder().tableUuid("uuid-1").databaseId("db1").tableId("tbl1").build(); + + when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); + when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); + when(analyzer.getCircuitBreakerThreshold()).thenReturn(5); + when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) + .thenReturn(Collections.emptyList()); + when(historyRepo.find("ORPHAN_FILES_DELETION", null, null, null, Pageable.unpaged())) + .thenReturn(Collections.emptyList()); + when(analyzer.isEnabled(expectedTable)).thenReturn(true); + when(analyzer.shouldSchedule(expectedTable, Optional.empty(), Optional.empty())) + .thenReturn(true); + + runner.analyze(); + + ArgumentCaptor captor = ArgumentCaptor.forClass(TableOperationRow.class); + verify(operationsRepo).save(captor.capture()); + TableOperationRow saved = captor.getValue(); + assertThat(saved.getTableUuid()).isEqualTo("uuid-1"); + assertThat(saved.getDatabaseName()).isEqualTo("db1"); + assertThat(saved.getTableName()).isEqualTo("tbl1"); + assertThat(saved.getOperationType()).isEqualTo("ORPHAN_FILES_DELETION"); + assertThat(saved.getStatus()).isEqualTo("PENDING"); + assertThat(saved.getId()).isNotNull(); + } + + @Test + void analyze_noOp_whenCadencePolicyReturnsFalseForPending() { + TableStatsRow statsEntity = new TableStatsRow(); + statsEntity.setTableUuid("uuid-1"); + statsEntity.setDatabaseId("db1"); + statsEntity.setTableName("tbl1"); + + TableSummary expectedTable = + TableSummary.builder().tableUuid("uuid-1").databaseId("db1").tableId("tbl1").build(); + + TableOperationRow existingEntity = new TableOperationRow(); + existingEntity.setId("existing-op-id"); + existingEntity.setStatus("PENDING"); + existingEntity.setTableUuid("uuid-1"); + existingEntity.setOperationType("ORPHAN_FILES_DELETION"); + existingEntity.setCreatedAt(Instant.now()); + + when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); + when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); + when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) + .thenReturn(List.of(existingEntity)); + when(historyRepo.find("ORPHAN_FILES_DELETION", null, null, null, Pageable.unpaged())) + .thenReturn(Collections.emptyList()); + when(analyzer.isEnabled(expectedTable)).thenReturn(true); + + TableOperationRecord existingRecord = new TableOperationRecord(); + existingRecord.setId("existing-op-id"); + existingRecord.setStatus("PENDING"); + existingRecord.setTableUuid("uuid-1"); + existingRecord.setOperationType("ORPHAN_FILES_DELETION"); + existingRecord.setCreatedAt(existingEntity.getCreatedAt()); + when(analyzer.shouldSchedule(expectedTable, Optional.of(existingRecord), Optional.empty())) + .thenReturn(false); + + runner.analyze(); + + verify(operationsRepo, never()).save(any()); + } + + @Test + void analyze_skipsTable_whenNotEnabled() { + TableStatsRow statsEntity = new TableStatsRow(); + statsEntity.setTableUuid("uuid-1"); + + TableSummary expectedTable = TableSummary.builder().tableUuid("uuid-1").build(); + + when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); + when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); + when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) + .thenReturn(Collections.emptyList()); + when(historyRepo.find("ORPHAN_FILES_DELETION", null, null, null, Pageable.unpaged())) + .thenReturn(Collections.emptyList()); + when(analyzer.isEnabled(expectedTable)).thenReturn(false); + + runner.analyze(); + + verify(operationsRepo, never()).save(any()); + } + + @Test + void analyze_skipsTable_whenShouldScheduleReturnsFalse() { + TableStatsRow statsEntity = new TableStatsRow(); + statsEntity.setTableUuid("uuid-1"); + + TableSummary expectedTable = TableSummary.builder().tableUuid("uuid-1").build(); + + TableOperationRow scheduled = new TableOperationRow(); + scheduled.setId("op-id"); + scheduled.setStatus("SCHEDULED"); + scheduled.setTableUuid("uuid-1"); + scheduled.setOperationType("ORPHAN_FILES_DELETION"); + scheduled.setCreatedAt(Instant.now()); + + when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); + when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); + when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) + .thenReturn(List.of(scheduled)); + when(historyRepo.find("ORPHAN_FILES_DELETION", null, null, null, Pageable.unpaged())) + .thenReturn(Collections.emptyList()); + when(analyzer.isEnabled(expectedTable)).thenReturn(true); + + TableOperationRecord scheduledRecord = new TableOperationRecord(); + scheduledRecord.setId("op-id"); + scheduledRecord.setStatus("SCHEDULED"); + scheduledRecord.setTableUuid("uuid-1"); + scheduledRecord.setOperationType("ORPHAN_FILES_DELETION"); + scheduledRecord.setCreatedAt(scheduled.getCreatedAt()); + when(analyzer.shouldSchedule(expectedTable, Optional.of(scheduledRecord), Optional.empty())) + .thenReturn(false); + + runner.analyze(); + + verify(operationsRepo, never()).save(any()); + } + + @Test + void analyze_skipsTable_whenTableUuidIsNull() { + TableStatsRow statsEntity = new TableStatsRow(); + statsEntity.setTableUuid(null); + + when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); + when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); + when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) + .thenReturn(Collections.emptyList()); + when(historyRepo.find(anyString(), any(), any(), any(), any())) + .thenReturn(Collections.emptyList()); + + runner.analyze(); + + verify(operationsRepo, never()).save(any()); + } + + @Test + void analyze_skipsTable_whenCircuitBreakerTrips() { + TableStatsRow statsEntity = new TableStatsRow(); + statsEntity.setTableUuid("uuid-1"); + statsEntity.setDatabaseId("db1"); + statsEntity.setTableName("tbl1"); + + TableSummary expectedTable = + TableSummary.builder().tableUuid("uuid-1").databaseId("db1").tableId("tbl1").build(); + + List failures = + IntStream.range(0, 3) + .mapToObj( + i -> + TableOperationHistoryRow.builder() + .id("fail-" + i) + .tableUuid("uuid-1") + .operationType("ORPHAN_FILES_DELETION") + .submittedAt(Instant.now().minusSeconds(i * 60)) + .status("FAILED") + .build()) + .collect(Collectors.toList()); + + when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); + when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); + when(analyzer.getCircuitBreakerThreshold()).thenReturn(3); + when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) + .thenReturn(Collections.emptyList()); + when(historyRepo.find("ORPHAN_FILES_DELETION", null, null, null, Pageable.unpaged())) + .thenReturn(failures); + when(analyzer.isEnabled(expectedTable)).thenReturn(true); + when(analyzer.shouldSchedule(expectedTable, Optional.empty(), Optional.of(failures.get(0)))) + .thenReturn(true); + + runner.analyze(); + + verify(operationsRepo, never()).save(any()); + } + + @Test + void analyze_doesNotTrip_whenFewerFailuresThanThreshold() { + TableStatsRow statsEntity = new TableStatsRow(); + statsEntity.setTableUuid("uuid-1"); + statsEntity.setDatabaseId("db1"); + statsEntity.setTableName("tbl1"); + + TableSummary expectedTable = + TableSummary.builder().tableUuid("uuid-1").databaseId("db1").tableId("tbl1").build(); + + List failures = + IntStream.range(0, 3) + .mapToObj( + i -> + TableOperationHistoryRow.builder() + .id("fail-" + i) + .tableUuid("uuid-1") + .operationType("ORPHAN_FILES_DELETION") + .submittedAt(Instant.now().minusSeconds(i * 60)) + .status("FAILED") + .build()) + .collect(Collectors.toList()); + + when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); + when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); + when(analyzer.getCircuitBreakerThreshold()).thenReturn(5); + when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) + .thenReturn(Collections.emptyList()); + when(historyRepo.find("ORPHAN_FILES_DELETION", null, null, null, Pageable.unpaged())) + .thenReturn(failures); + when(analyzer.isEnabled(expectedTable)).thenReturn(true); + when(analyzer.shouldSchedule(expectedTable, Optional.empty(), Optional.of(failures.get(0)))) + .thenReturn(true); + + runner.analyze(); + + verify(operationsRepo).save(any()); + } +} diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzerTest.java new file mode 100644 index 000000000..e2ea5ccdd --- /dev/null +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzerTest.java @@ -0,0 +1,242 @@ +package com.linkedin.openhouse.analyzer; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.linkedin.openhouse.analyzer.model.TableOperationRecord; +import com.linkedin.openhouse.analyzer.model.TableSummary; +import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; +import java.time.Duration; +import java.time.Instant; +import java.util.Collections; +import java.util.Map; +import java.util.Optional; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class OrphanFilesDeletionAnalyzerTest { + + private static final Duration SUCCESS_INTERVAL = Duration.ofHours(24); + private static final Duration FAILURE_INTERVAL = Duration.ofHours(1); + private static final Duration SCHEDULED_TIMEOUT = Duration.ofHours(6); + + private OrphanFilesDeletionAnalyzer analyzer; + + @BeforeEach + void setUp() { + analyzer = + new OrphanFilesDeletionAnalyzer( + new CadencePolicy(SUCCESS_INTERVAL, FAILURE_INTERVAL, SCHEDULED_TIMEOUT)); + } + + // --- isEnabled --- + + @Test + void isEnabled_returnsTrue_whenPropertySet() { + assertThat(analyzer.isEnabled(tableWithProperty("true"))).isTrue(); + } + + @Test + void isEnabled_returnsFalse_whenPropertyAbsent() { + assertThat(analyzer.isEnabled(tableWithProperty(null))).isFalse(); + } + + @Test + void isEnabled_returnsFalse_whenPropertyFalse() { + assertThat(analyzer.isEnabled(tableWithProperty("false"))).isFalse(); + } + + @Test + void isEnabled_returnsFalse_whenTablePropertiesEmpty() { + TableSummary table = TableSummary.builder().tableUuid("uuid").build(); + assertThat(analyzer.isEnabled(table)).isFalse(); + } + + // --- shouldSchedule: no existing op --- + + @Test + void shouldSchedule_noOp_noHistory_returnsTrue() { + assertThat( + analyzer.shouldSchedule(tableWithProperty("true"), Optional.empty(), Optional.empty())) + .isTrue(); + } + + @Test + void shouldSchedule_noOp_successHistoryAfterCooldown_returnsTrue() { + Instant longAgo = Instant.now().minus(SUCCESS_INTERVAL).minusSeconds(60); + assertThat( + analyzer.shouldSchedule( + tableWithProperty("true"), + Optional.empty(), + Optional.of(historyWithStatus("SUCCESS", longAgo)))) + .isTrue(); + } + + @Test + void shouldSchedule_noOp_successHistoryBeforeCooldown_returnsFalse() { + Instant recent = Instant.now().minus(SUCCESS_INTERVAL).plusSeconds(60); + assertThat( + analyzer.shouldSchedule( + tableWithProperty("true"), + Optional.empty(), + Optional.of(historyWithStatus("SUCCESS", recent)))) + .isFalse(); + } + + @Test + void shouldSchedule_noOp_failedHistoryAfterRetry_returnsTrue() { + Instant longAgo = Instant.now().minus(FAILURE_INTERVAL).minusSeconds(60); + assertThat( + analyzer.shouldSchedule( + tableWithProperty("true"), + Optional.empty(), + Optional.of(historyWithStatus("FAILED", longAgo)))) + .isTrue(); + } + + @Test + void shouldSchedule_noOp_failedHistoryBeforeRetry_returnsFalse() { + Instant recent = Instant.now().minus(FAILURE_INTERVAL).plusSeconds(60); + assertThat( + analyzer.shouldSchedule( + tableWithProperty("true"), + Optional.empty(), + Optional.of(historyWithStatus("FAILED", recent)))) + .isFalse(); + } + + // --- shouldSchedule: PENDING / SCHEDULING --- + + @Test + void shouldSchedule_pending_returnsFalse() { + assertThat( + analyzer.shouldSchedule( + tableWithProperty("true"), + Optional.of(opWithStatus("PENDING", null)), + Optional.empty())) + .isFalse(); + } + + @Test + void shouldSchedule_scheduling_returnsFalse() { + assertThat( + analyzer.shouldSchedule( + tableWithProperty("true"), + Optional.of(opWithStatus("SCHEDULING", null)), + Optional.empty())) + .isFalse(); + } + + // --- shouldSchedule: SCHEDULED + history --- + + @Test + void shouldSchedule_scheduledNoHistory_withinTimeout_returnsFalse() { + Instant recent = Instant.now().minus(SCHEDULED_TIMEOUT).plusSeconds(60); + assertThat( + analyzer.shouldSchedule( + tableWithProperty("true"), + Optional.of(opWithStatus("SCHEDULED", recent)), + Optional.empty())) + .isFalse(); + } + + @Test + void shouldSchedule_scheduledNoHistory_pastTimeout_returnsTrue() { + Instant longAgo = Instant.now().minus(SCHEDULED_TIMEOUT).minusSeconds(60); + assertThat( + analyzer.shouldSchedule( + tableWithProperty("true"), + Optional.of(opWithStatus("SCHEDULED", longAgo)), + Optional.empty())) + .isTrue(); + } + + @Test + void shouldSchedule_scheduledWithNullScheduledAt_noHistory_returnsTrue() { + assertThat( + analyzer.shouldSchedule( + tableWithProperty("true"), + Optional.of(opWithStatus("SCHEDULED", null)), + Optional.empty())) + .isTrue(); + } + + @Test + void shouldSchedule_scheduledWithSuccessHistory_afterCooldown_returnsTrue() { + Instant scheduledAt = Instant.now().minusSeconds(3600); + Instant historyAt = Instant.now().minus(SUCCESS_INTERVAL).minusSeconds(60); + assertThat( + analyzer.shouldSchedule( + tableWithProperty("true"), + Optional.of(opWithStatus("SCHEDULED", scheduledAt)), + Optional.of(historyWithStatus("SUCCESS", historyAt)))) + .isTrue(); + } + + @Test + void shouldSchedule_scheduledWithSuccessHistory_beforeCooldown_returnsFalse() { + Instant scheduledAt = Instant.now().minusSeconds(3600); + Instant historyAt = Instant.now().minus(SUCCESS_INTERVAL).plusSeconds(60); + assertThat( + analyzer.shouldSchedule( + tableWithProperty("true"), + Optional.of(opWithStatus("SCHEDULED", scheduledAt)), + Optional.of(historyWithStatus("SUCCESS", historyAt)))) + .isFalse(); + } + + @Test + void shouldSchedule_scheduledWithFailedHistory_afterRetry_returnsTrue() { + Instant scheduledAt = Instant.now().minusSeconds(3600); + Instant historyAt = Instant.now().minus(FAILURE_INTERVAL).minusSeconds(60); + assertThat( + analyzer.shouldSchedule( + tableWithProperty("true"), + Optional.of(opWithStatus("SCHEDULED", scheduledAt)), + Optional.of(historyWithStatus("FAILED", historyAt)))) + .isTrue(); + } + + @Test + void shouldSchedule_scheduledWithFailedHistory_beforeRetry_returnsFalse() { + Instant scheduledAt = Instant.now().minusSeconds(3600); + Instant historyAt = Instant.now().minus(FAILURE_INTERVAL).plusSeconds(60); + assertThat( + analyzer.shouldSchedule( + tableWithProperty("true"), + Optional.of(opWithStatus("SCHEDULED", scheduledAt)), + Optional.of(historyWithStatus("FAILED", historyAt)))) + .isFalse(); + } + + // --- helpers --- + + private TableSummary tableWithProperty(String value) { + Map props = + value == null + ? Collections.emptyMap() + : Map.of(OrphanFilesDeletionAnalyzer.OFD_ENABLED_PROPERTY, value); + return TableSummary.builder() + .tableUuid("test-uuid") + .databaseId("db1") + .tableId("tbl1") + .tableProperties(props) + .build(); + } + + private TableOperationRecord opWithStatus(String status, Instant scheduledAt) { + TableOperationRecord op = new TableOperationRecord(); + op.setStatus(status); + op.setScheduledAt(scheduledAt); + return op; + } + + private TableOperationHistoryRow historyWithStatus(String status, Instant submittedAt) { + return TableOperationHistoryRow.builder() + .id("hist-id") + .tableUuid("test-uuid") + .operationType("ORPHAN_FILES_DELETION") + .submittedAt(submittedAt) + .status(status) + .build(); + } +} diff --git a/settings.gradle b/settings.gradle index 0d64dad53..52873b677 100644 --- a/settings.gradle +++ b/settings.gradle @@ -51,6 +51,7 @@ include ':services:housetables' include ':services:jobs' include ':services:optimizer' include ':apps:optimizer' +include ':apps:optimizer-analyzer' include ':services:tables' include ':tables-test-fixtures:tables-test-fixtures-iceberg-1.2' include ':tables-test-fixtures:tables-test-fixtures-iceberg-1.5' From 63b0768b249b29a90470687fc1296d0ef833821b Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Tue, 7 Apr 2026 11:25:40 -0700 Subject: [PATCH 02/19] fix: address PR review feedback on optimizer-3 analyzer - Rename TableSummary to Table, TableOperationRecord to TableOperation - Add Table.from(TableStatsRow) and TableOperation.from(TableOperationRow) - Add TableOperation.pending(Table, type) factory and toRow() for JPA - Move circuit breaker check into OperationAnalyzer as overridable default - Parameterize analyze() with optional filters (optype, db, table, uuid) - Inline loadOpsMap, loadHistoryMap, remove standalone converter methods - Expand CadencePolicy field javadoc with plain-english examples - Add TODOs: per-db iteration, benchmarking, querybuilder, CB reset Co-Authored-By: Claude Opus 4.6 --- .../openhouse/analyzer/AnalyzerRunner.java | 218 +++++++----------- .../openhouse/analyzer/CadencePolicy.java | 24 +- .../openhouse/analyzer/OperationAnalyzer.java | 33 ++- .../analyzer/OrphanFilesDeletionAnalyzer.java | 10 +- .../openhouse/analyzer/model/Table.java | 42 ++++ .../analyzer/model/TableOperation.java | 91 ++++++++ .../analyzer/model/TableOperationRecord.java | 23 -- .../analyzer/model/TableSummary.java | 26 --- .../analyzer/AnalyzerRunnerTest.java | 45 ++-- .../OrphanFilesDeletionAnalyzerTest.java | 14 +- 10 files changed, 295 insertions(+), 231 deletions(-) create mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/Table.java create mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java delete mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperationRecord.java delete mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableSummary.java diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java index 5ad568d49..5ad653e97 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java @@ -1,20 +1,16 @@ package com.linkedin.openhouse.analyzer; -import com.linkedin.openhouse.analyzer.model.TableOperationRecord; -import com.linkedin.openhouse.analyzer.model.TableSummary; +import com.linkedin.openhouse.analyzer.model.Table; +import com.linkedin.openhouse.analyzer.model.TableOperation; import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; import com.linkedin.openhouse.optimizer.entity.TableOperationRow; -import com.linkedin.openhouse.optimizer.entity.TableStatsRow; import com.linkedin.openhouse.optimizer.repository.TableOperationHistoryRepository; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; -import java.time.Instant; import java.util.Collections; -import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.Optional; -import java.util.UUID; import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -22,12 +18,19 @@ import org.springframework.stereotype.Component; /** - * Core analysis loop. Loads all {@code table_stats} rows and evaluates each table against every + * Core analysis loop. Loads {@code table_stats} rows and evaluates each table against every * registered {@link OperationAnalyzer} in a single pass. * *

The two sides of the join — current operations and circuit-breaker history — are loaded into * memory once per run before the table loop. Both are naturally bounded (only tables with active or * recently failed operations have rows), so holding them in maps is safe at any table scale. + * + *

// TODO: Iterate per-database instead of loading all tables at once. This scopes memory usage + * and allows incremental progress. When we go per-db we may still see 10k tables per iteration, but + * that should be fine. + * + *

// TODO: Add benchmarking and scale tests. Measure memory footprint at 10k tables per + * iteration to validate the in-memory join approach. */ @Slf4j @Component @@ -39,147 +42,90 @@ public class AnalyzerRunner { private final TableOperationsRepository operationsRepo; private final TableOperationHistoryRepository historyRepo; - /** Run the full analysis loop once. */ + /** Run the full analysis loop once with no filters. */ public void analyze() { + analyze(null, null, null, null); + } + + /** + * Run the analysis loop, optionally scoped to a specific operation type, database, table name, or + * table UUID. Pass {@code null} for any parameter to skip that filter. + */ + public void analyze( + String operationType, String databaseName, String tableName, String tableUuid) { + + List activeAnalyzers = + operationType == null + ? analyzers + : analyzers.stream() + .filter(a -> a.getOperationType().equals(operationType)) + .collect(Collectors.toList()); + // Pre-load the small sides of the joins — one query per analyzer type. - Map> opsByType = - analyzers.stream() + // TODO: Move to a query builder (Criteria API or jOOQ) as filter count grows. + Map> opsByType = + activeAnalyzers.stream() .collect( Collectors.toMap( - OperationAnalyzer::getOperationType, a -> loadOpsMap(a.getOperationType()))); + OperationAnalyzer::getOperationType, + a -> + operationsRepo + .find(a.getOperationType(), null, tableUuid, databaseName, tableName) + .stream() + .filter(e -> e.getTableUuid() != null) + .collect( + Collectors.toMap( + TableOperationRow::getTableUuid, + TableOperation::from, + TableOperation::mostRecent)))); Map>> historyByType = - analyzers.stream() + activeAnalyzers.stream() .collect( Collectors.toMap( OperationAnalyzer::getOperationType, - a -> loadHistoryMap(a.getOperationType()))); - - List tableList = - statsRepo.find(null, null, null).stream() + a -> + historyRepo.find(a.getOperationType(), null, null, null, Pageable.unpaged()) + .stream() + .collect( + Collectors.groupingBy(TableOperationHistoryRow::getTableUuid)))); + + List tables = + statsRepo.find(databaseName, tableName, tableUuid).stream() .filter(row -> row.getTableUuid() != null) + .map(Table::from) .collect(Collectors.toList()); - log.info("Found {} tables in optimizer table_stats", tableList.size()); - - tableList.forEach( - row -> { - TableSummary table = toSummary(row); - analyzers.forEach( - analyzer -> { - String type = analyzer.getOperationType(); - Optional currentOp = - Optional.ofNullable(opsByType.get(type).get(row.getTableUuid())); - List history = - historyByType - .get(type) - .getOrDefault(row.getTableUuid(), Collections.emptyList()); - - Optional latestHistory = history.stream().findFirst(); - - if (analyzer.isEnabled(table) - && analyzer.shouldSchedule(table, currentOp, latestHistory) - && !isCircuitBroken(analyzer, row.getTableUuid(), history)) { - operationsRepo.save(buildOperation(row, type)); - log.info( - "Created PENDING {} operation for table {}.{}", - type, - row.getDatabaseId(), - row.getTableName()); - } - }); - }); + log.info("Found {} tables in optimizer table_stats", tables.size()); + + tables.forEach( + table -> + activeAnalyzers.forEach( + analyzer -> { + if (!analyzer.isEnabled(table)) { + return; + } + + Optional currentOp = + Optional.ofNullable( + opsByType.get(analyzer.getOperationType()).get(table.getTableUuid())); + List history = + historyByType + .get(analyzer.getOperationType()) + .getOrDefault(table.getTableUuid(), Collections.emptyList()); + Optional latestHistory = history.stream().findFirst(); + + if (analyzer.shouldSchedule(table, currentOp, latestHistory) + && !analyzer.isCircuitBroken(table.getTableUuid(), history)) { + TableOperation op = TableOperation.pending(table, analyzer.getOperationType()); + operationsRepo.save(op.toRow()); + log.info( + "Created PENDING {} operation for table {}.{}", + analyzer.getOperationType(), + table.getDatabaseId(), + table.getTableId()); + } + })); log.info("Analysis complete"); } - - /** - * Loads the most recent operation record per table for the given type. Deduplicates by keeping - * the newer row when a table has more than one active record. - */ - private Map loadOpsMap(String operationType) { - Map map = - operationsRepo.find(operationType, null, null, null, null).stream() - .filter(e -> e.getTableUuid() != null) - .collect( - Collectors.toMap( - TableOperationRow::getTableUuid, - AnalyzerRunner::toRecord, - (a, b) -> mostRecent(a, b))); - log.info("Analyzer {} found {} tables with operation history", operationType, map.size()); - return map; - } - - /** - * Loads all history rows for the given type and groups them by {@code tableUuid}, newest first. - * Called once per analyzer type to eliminate per-table N+1 queries in the circuit breaker check. - */ - private Map> loadHistoryMap(String operationType) { - return historyRepo.find(operationType, null, null, null, Pageable.unpaged()).stream() - .collect(Collectors.groupingBy(TableOperationHistoryRow::getTableUuid)); - } - - private TableOperationRow buildOperation(TableStatsRow row, String operationType) { - return TableOperationRow.builder() - .id(UUID.randomUUID().toString()) - .tableUuid(row.getTableUuid()) - .databaseName(row.getDatabaseId()) - .tableName(row.getTableName()) - .operationType(operationType) - .status("PENDING") - .createdAt(Instant.now()) - .version(0L) - .build(); - } - - private TableSummary toSummary(TableStatsRow e) { - return TableSummary.builder() - .tableUuid(e.getTableUuid()) - .databaseId(e.getDatabaseId()) - .tableId(e.getTableName()) - .tableProperties( - e.getTableProperties() != null ? e.getTableProperties() : Collections.emptyMap()) - .stats(e.getStats()) - .build(); - } - - /** - * Returns {@code true} if the circuit breaker has tripped. Uses the pre-loaded history list - * instead of querying the DB per table. - */ - private boolean isCircuitBroken( - OperationAnalyzer analyzer, String tableUuid, List history) { - int threshold = analyzer.getCircuitBreakerThreshold(); - if (threshold <= 0 || history.size() < threshold) { - return false; - } - boolean allFailed = - history.stream().limit(threshold).allMatch(r -> "FAILED".equals(r.getStatus())); - if (allFailed) { - log.warn( - "Circuit breaker tripped for table {} operation {}: last {} attempts all FAILED", - tableUuid, - analyzer.getOperationType(), - threshold); - } - return allFailed; - } - - private static TableOperationRecord mostRecent(TableOperationRecord a, TableOperationRecord b) { - Comparator byCreatedAt = - Comparator.comparing(r -> r.getCreatedAt() != null ? r.getCreatedAt() : Instant.EPOCH); - return byCreatedAt.compare(a, b) >= 0 ? a : b; - } - - private static TableOperationRecord toRecord(TableOperationRow e) { - TableOperationRecord r = new TableOperationRecord(); - r.setId(e.getId()); - r.setTableUuid(e.getTableUuid()); - r.setDatabaseName(e.getDatabaseName()); - r.setTableName(e.getTableName()); - r.setOperationType(e.getOperationType()); - r.setStatus(e.getStatus()); - r.setCreatedAt(e.getCreatedAt()); - r.setScheduledAt(e.getScheduledAt()); - return r; - } } diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java index 36d9ff841..a66a7b072 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.analyzer; -import com.linkedin.openhouse.analyzer.model.TableOperationRecord; +import com.linkedin.openhouse.analyzer.model.TableOperation; import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; import java.time.Duration; import java.time.Instant; @@ -20,8 +20,26 @@ @RequiredArgsConstructor public class CadencePolicy { + /** + * How long to wait after a successful operation before re-evaluating the table. For example, if + * set to 24 hours and OFD succeeded at 10:00 AM Monday, the table won't be scheduled again until + * after 10:00 AM Tuesday. + */ private final Duration successRetryInterval; + + /** + * How long to wait after a failed operation before retrying. Shorter than the success interval to + * allow quick recovery. For example, if set to 1 hour and OFD failed at 2:00 PM, the table + * becomes eligible for retry at 3:00 PM. + */ private final Duration failureRetryInterval; + + /** + * Maximum time a row can stay in SCHEDULED status before the analyzer treats it as stale and + * overwrites it with a new PENDING row. Handles the case where a Spark job crashes without + * reporting back. For example, if set to 6 hours and a job was submitted at noon but never + * completed, the analyzer will re-schedule the table after 6:00 PM. + */ private final Duration scheduledTimeout; /** @@ -31,11 +49,11 @@ public class CadencePolicy { * @param latestHistory the most recent history entry for this (table, type), or empty */ public boolean shouldSchedule( - Optional currentOp, Optional latestHistory) { + Optional currentOp, Optional latestHistory) { if (currentOp.isEmpty()) { return decideFromHistory(latestHistory); } - TableOperationRecord op = currentOp.get(); + TableOperation op = currentOp.get(); switch (op.getStatus()) { case "PENDING": case "SCHEDULING": diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java index 425fbdbfb..731c8127f 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java @@ -1,8 +1,9 @@ package com.linkedin.openhouse.analyzer; -import com.linkedin.openhouse.analyzer.model.TableOperationRecord; -import com.linkedin.openhouse.analyzer.model.TableSummary; +import com.linkedin.openhouse.analyzer.model.Table; +import com.linkedin.openhouse.analyzer.model.TableOperation; import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; +import java.util.List; import java.util.Optional; /** @@ -18,7 +19,7 @@ public interface OperationAnalyzer { * Returns {@code true} if this operation is opted-in for the given table. Tables that return * {@code false} are skipped entirely — no upsert is issued. */ - boolean isEnabled(TableSummary table); + boolean isEnabled(Table table); /** * Returns {@code true} if a new or refreshed operation record should be upserted. @@ -28,8 +29,8 @@ public interface OperationAnalyzer { * @param latestHistory the most recent history entry for this (table, type), or empty */ boolean shouldSchedule( - TableSummary table, - Optional currentOp, + Table table, + Optional currentOp, Optional latestHistory); /** @@ -40,4 +41,26 @@ boolean shouldSchedule( default int getCircuitBreakerThreshold() { return 5; } + + /** + * Returns {@code true} if the circuit breaker has tripped for this table. The default + * implementation checks whether the last N history entries are all FAILED. Individual analyzers + * can override this to implement different strategies (e.g., time-based backoff). + * + *

// TODO: Add circuit breaker reset with exponential backoff so tables can recover + * automatically after a cooldown period instead of staying tripped permanently. + * + *

// TODO: Add a communication path to surface tripped circuit breakers to users (e.g., + * metrics, alerts, or a dashboard query). + * + * @param tableUuid the table whose history to check + * @param history recent history entries for this (table, type), newest first + */ + default boolean isCircuitBroken(String tableUuid, List history) { + int threshold = getCircuitBreakerThreshold(); + if (threshold <= 0 || history.size() < threshold) { + return false; + } + return history.stream().limit(threshold).allMatch(r -> "FAILED".equals(r.getStatus())); + } } diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzer.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzer.java index 016057aa4..c348b0265 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzer.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzer.java @@ -1,7 +1,7 @@ package com.linkedin.openhouse.analyzer; -import com.linkedin.openhouse.analyzer.model.TableOperationRecord; -import com.linkedin.openhouse.analyzer.model.TableSummary; +import com.linkedin.openhouse.analyzer.model.Table; +import com.linkedin.openhouse.analyzer.model.TableOperation; import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; import java.time.Duration; import java.util.Optional; @@ -41,14 +41,14 @@ public String getOperationType() { } @Override - public boolean isEnabled(TableSummary table) { + public boolean isEnabled(Table table) { return "true".equals(table.getTableProperties().get(OFD_ENABLED_PROPERTY)); } @Override public boolean shouldSchedule( - TableSummary table, - Optional currentOp, + Table table, + Optional currentOp, Optional latestHistory) { return cadencePolicy.shouldSchedule(currentOp, latestHistory); } diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/Table.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/Table.java new file mode 100644 index 000000000..d170f29dd --- /dev/null +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/Table.java @@ -0,0 +1,42 @@ +package com.linkedin.openhouse.analyzer.model; + +import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import com.linkedin.openhouse.optimizer.model.TableStats; +import java.util.Collections; +import java.util.Map; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * An OpenHouse table enriched with stats and properties, built by combining data sources. This is + * the input to the analysis pipeline: analyzers evaluate a {@code Table} and decide whether to + * produce a {@link TableOperation}. + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class Table { + + private String tableUuid; + private String databaseId; + private String tableId; + + @Builder.Default private Map tableProperties = Collections.emptyMap(); + + private TableStats stats; + + /** Build a {@code Table} from a {@code table_stats} row. */ + public static Table from(TableStatsRow row) { + return Table.builder() + .tableUuid(row.getTableUuid()) + .databaseId(row.getDatabaseId()) + .tableId(row.getTableName()) + .tableProperties( + row.getTableProperties() != null ? row.getTableProperties() : Collections.emptyMap()) + .stats(row.getStats()) + .build(); + } +} diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java new file mode 100644 index 000000000..5a81a3848 --- /dev/null +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java @@ -0,0 +1,91 @@ +package com.linkedin.openhouse.analyzer.model; + +import com.linkedin.openhouse.optimizer.entity.TableOperationRow; +import java.time.Instant; +import java.util.Comparator; +import java.util.UUID; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * An operation the analyzer has decided to schedule for a table. Built either from an existing + * {@link TableOperationRow} (when loading current state) or from a {@link Table} (when creating a + * new PENDING operation). Converts back to a JPA row via {@link #toRow()}. + */ +@Data +@NoArgsConstructor +public class TableOperation { + + /** Unique operation ID (UUID). */ + private String id; + + /** The table this operation targets. */ + private String tableUuid; + + /** Database name, denormalized for display. */ + private String databaseName; + + /** Table name, denormalized for display. */ + private String tableName; + + /** Operation type (e.g., {@code "ORPHAN_FILES_DELETION"}). */ + private String operationType; + + /** Current lifecycle status: PENDING, SCHEDULING, SCHEDULED. */ + private String status; + + /** When this operation record was created. */ + private Instant createdAt; + + /** When the scheduler last submitted a job for this operation. */ + private Instant scheduledAt; + + /** Build a {@code TableOperation} from an existing JPA row. */ + public static TableOperation from(TableOperationRow row) { + TableOperation op = new TableOperation(); + op.id = row.getId(); + op.tableUuid = row.getTableUuid(); + op.databaseName = row.getDatabaseName(); + op.tableName = row.getTableName(); + op.operationType = row.getOperationType(); + op.status = row.getStatus(); + op.createdAt = row.getCreatedAt(); + op.scheduledAt = row.getScheduledAt(); + return op; + } + + /** Create a new PENDING operation for the given table and operation type. */ + public static TableOperation pending(Table table, String operationType) { + TableOperation op = new TableOperation(); + op.id = UUID.randomUUID().toString(); + op.tableUuid = table.getTableUuid(); + op.databaseName = table.getDatabaseId(); + op.tableName = table.getTableId(); + op.operationType = operationType; + op.status = "PENDING"; + op.createdAt = Instant.now(); + return op; + } + + /** Convert to a JPA entity for persistence. */ + public TableOperationRow toRow() { + return TableOperationRow.builder() + .id(id) + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableName(tableName) + .operationType(operationType) + .status(status) + .createdAt(createdAt) + .scheduledAt(scheduledAt) + .version(0L) + .build(); + } + + /** Return the more recently created of two operations. */ + public static TableOperation mostRecent(TableOperation a, TableOperation b) { + Comparator byCreatedAt = + Comparator.comparing(r -> r.getCreatedAt() != null ? r.getCreatedAt() : Instant.EPOCH); + return byCreatedAt.compare(a, b) >= 0 ? a : b; + } +} diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperationRecord.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperationRecord.java deleted file mode 100644 index 51bc4d803..000000000 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperationRecord.java +++ /dev/null @@ -1,23 +0,0 @@ -package com.linkedin.openhouse.analyzer.model; - -import java.time.Instant; -import lombok.Data; -import lombok.NoArgsConstructor; - -/** - * Lightweight representation of an active table operation record. Mirrors the fields in {@link - * com.linkedin.openhouse.optimizer.entity.TableOperationRow} that the Analyzer needs. - */ -@Data -@NoArgsConstructor -public class TableOperationRecord { - - private String id; - private String tableUuid; - private String databaseName; - private String tableName; - private String operationType; - private String status; - private Instant createdAt; - private Instant scheduledAt; -} diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableSummary.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableSummary.java deleted file mode 100644 index fbe166fff..000000000 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableSummary.java +++ /dev/null @@ -1,26 +0,0 @@ -package com.linkedin.openhouse.analyzer.model; - -import com.linkedin.openhouse.optimizer.model.TableStats; -import java.util.Collections; -import java.util.Map; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - -/** Internal representation of a table, decoupled from any external API response model. */ -@Data -@Builder -@NoArgsConstructor -@AllArgsConstructor -public class TableSummary { - - private String tableUuid; - private String databaseId; - private String tableId; - - @Builder.Default private Map tableProperties = Collections.emptyMap(); - - /** Commit stats from the optimizer {@code table_stats} table. Null if no stats recorded yet. */ - private TableStats stats; -} diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java index 69de877a2..29fd20e50 100644 --- a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java @@ -7,8 +7,8 @@ import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; -import com.linkedin.openhouse.analyzer.model.TableOperationRecord; -import com.linkedin.openhouse.analyzer.model.TableSummary; +import com.linkedin.openhouse.analyzer.model.Table; +import com.linkedin.openhouse.analyzer.model.TableOperation; import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; import com.linkedin.openhouse.optimizer.entity.TableOperationRow; import com.linkedin.openhouse.optimizer.entity.TableStatsRow; @@ -51,11 +51,12 @@ void analyze_insertsNewRow_forEligibleTableWithNoExistingOp() { statsEntity.setDatabaseId("db1"); statsEntity.setTableName("tbl1"); - TableSummary expectedTable = - TableSummary.builder().tableUuid("uuid-1").databaseId("db1").tableId("tbl1").build(); + Table expectedTable = + Table.builder().tableUuid("uuid-1").databaseId("db1").tableId("tbl1").build(); when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); + when(analyzer.isCircuitBroken(anyString(), any())).thenCallRealMethod(); when(analyzer.getCircuitBreakerThreshold()).thenReturn(5); when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) .thenReturn(Collections.emptyList()); @@ -85,8 +86,8 @@ void analyze_noOp_whenCadencePolicyReturnsFalseForPending() { statsEntity.setDatabaseId("db1"); statsEntity.setTableName("tbl1"); - TableSummary expectedTable = - TableSummary.builder().tableUuid("uuid-1").databaseId("db1").tableId("tbl1").build(); + Table expectedTable = + Table.builder().tableUuid("uuid-1").databaseId("db1").tableId("tbl1").build(); TableOperationRow existingEntity = new TableOperationRow(); existingEntity.setId("existing-op-id"); @@ -103,13 +104,8 @@ void analyze_noOp_whenCadencePolicyReturnsFalseForPending() { .thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(true); - TableOperationRecord existingRecord = new TableOperationRecord(); - existingRecord.setId("existing-op-id"); - existingRecord.setStatus("PENDING"); - existingRecord.setTableUuid("uuid-1"); - existingRecord.setOperationType("ORPHAN_FILES_DELETION"); - existingRecord.setCreatedAt(existingEntity.getCreatedAt()); - when(analyzer.shouldSchedule(expectedTable, Optional.of(existingRecord), Optional.empty())) + TableOperation existingOp = TableOperation.from(existingEntity); + when(analyzer.shouldSchedule(expectedTable, Optional.of(existingOp), Optional.empty())) .thenReturn(false); runner.analyze(); @@ -122,7 +118,7 @@ void analyze_skipsTable_whenNotEnabled() { TableStatsRow statsEntity = new TableStatsRow(); statsEntity.setTableUuid("uuid-1"); - TableSummary expectedTable = TableSummary.builder().tableUuid("uuid-1").build(); + Table expectedTable = Table.builder().tableUuid("uuid-1").build(); when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); @@ -142,7 +138,7 @@ void analyze_skipsTable_whenShouldScheduleReturnsFalse() { TableStatsRow statsEntity = new TableStatsRow(); statsEntity.setTableUuid("uuid-1"); - TableSummary expectedTable = TableSummary.builder().tableUuid("uuid-1").build(); + Table expectedTable = Table.builder().tableUuid("uuid-1").build(); TableOperationRow scheduled = new TableOperationRow(); scheduled.setId("op-id"); @@ -159,13 +155,8 @@ void analyze_skipsTable_whenShouldScheduleReturnsFalse() { .thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(true); - TableOperationRecord scheduledRecord = new TableOperationRecord(); - scheduledRecord.setId("op-id"); - scheduledRecord.setStatus("SCHEDULED"); - scheduledRecord.setTableUuid("uuid-1"); - scheduledRecord.setOperationType("ORPHAN_FILES_DELETION"); - scheduledRecord.setCreatedAt(scheduled.getCreatedAt()); - when(analyzer.shouldSchedule(expectedTable, Optional.of(scheduledRecord), Optional.empty())) + TableOperation scheduledOp = TableOperation.from(scheduled); + when(analyzer.shouldSchedule(expectedTable, Optional.of(scheduledOp), Optional.empty())) .thenReturn(false); runner.analyze(); @@ -197,8 +188,8 @@ void analyze_skipsTable_whenCircuitBreakerTrips() { statsEntity.setDatabaseId("db1"); statsEntity.setTableName("tbl1"); - TableSummary expectedTable = - TableSummary.builder().tableUuid("uuid-1").databaseId("db1").tableId("tbl1").build(); + Table expectedTable = + Table.builder().tableUuid("uuid-1").databaseId("db1").tableId("tbl1").build(); List failures = IntStream.range(0, 3) @@ -215,6 +206,7 @@ void analyze_skipsTable_whenCircuitBreakerTrips() { when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); + when(analyzer.isCircuitBroken(anyString(), any())).thenCallRealMethod(); when(analyzer.getCircuitBreakerThreshold()).thenReturn(3); when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) .thenReturn(Collections.emptyList()); @@ -236,8 +228,8 @@ void analyze_doesNotTrip_whenFewerFailuresThanThreshold() { statsEntity.setDatabaseId("db1"); statsEntity.setTableName("tbl1"); - TableSummary expectedTable = - TableSummary.builder().tableUuid("uuid-1").databaseId("db1").tableId("tbl1").build(); + Table expectedTable = + Table.builder().tableUuid("uuid-1").databaseId("db1").tableId("tbl1").build(); List failures = IntStream.range(0, 3) @@ -254,6 +246,7 @@ void analyze_doesNotTrip_whenFewerFailuresThanThreshold() { when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); + when(analyzer.isCircuitBroken(anyString(), any())).thenCallRealMethod(); when(analyzer.getCircuitBreakerThreshold()).thenReturn(5); when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) .thenReturn(Collections.emptyList()); diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzerTest.java index e2ea5ccdd..171846ff8 100644 --- a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzerTest.java +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzerTest.java @@ -2,8 +2,8 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.analyzer.model.TableOperationRecord; -import com.linkedin.openhouse.analyzer.model.TableSummary; +import com.linkedin.openhouse.analyzer.model.Table; +import com.linkedin.openhouse.analyzer.model.TableOperation; import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; import java.time.Duration; import java.time.Instant; @@ -47,7 +47,7 @@ void isEnabled_returnsFalse_whenPropertyFalse() { @Test void isEnabled_returnsFalse_whenTablePropertiesEmpty() { - TableSummary table = TableSummary.builder().tableUuid("uuid").build(); + Table table = Table.builder().tableUuid("uuid").build(); assertThat(analyzer.isEnabled(table)).isFalse(); } @@ -210,12 +210,12 @@ void shouldSchedule_scheduledWithFailedHistory_beforeRetry_returnsFalse() { // --- helpers --- - private TableSummary tableWithProperty(String value) { + private Table tableWithProperty(String value) { Map props = value == null ? Collections.emptyMap() : Map.of(OrphanFilesDeletionAnalyzer.OFD_ENABLED_PROPERTY, value); - return TableSummary.builder() + return Table.builder() .tableUuid("test-uuid") .databaseId("db1") .tableId("tbl1") @@ -223,8 +223,8 @@ private TableSummary tableWithProperty(String value) { .build(); } - private TableOperationRecord opWithStatus(String status, Instant scheduledAt) { - TableOperationRecord op = new TableOperationRecord(); + private TableOperation opWithStatus(String status, Instant scheduledAt) { + TableOperation op = new TableOperation(); op.setStatus(status); op.setScheduledAt(scheduledAt); return op; From 11dd115b3c72b637d5ff7e7232be7b20dcb8704c Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Fri, 1 May 2026 10:25:55 -0700 Subject: [PATCH 03/19] fix(optimizer): propagate optimizer-0 renames into apps/optimizer + analyzer - apps/optimizer shared module: rename databaseId -> databaseName in TableStatsRow + TableStatsRepository; submittedAt -> completedAt in TableOperationHistoryRow + TableOperationHistoryRepository; tighten database_name and table_name from VARCHAR(255) to VARCHAR(128). - Analyzer Table model: rename databaseId -> databaseName so the domain object matches the underlying entity. Update Table.from factory and downstream usages in AnalyzerRunner, TableOperation, and CadencePolicy (which now reads completedAt off history rows). - Analyzer tests: update Table builder and TableOperationHistoryRow builder usages to the renamed fields. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../openhouse/analyzer/AnalyzerRunner.java | 2 +- .../openhouse/analyzer/CadencePolicy.java | 4 ++-- .../openhouse/analyzer/model/Table.java | 4 ++-- .../analyzer/model/TableOperation.java | 2 +- .../analyzer/AnalyzerRunnerTest.java | 20 +++++++++---------- .../OrphanFilesDeletionAnalyzerTest.java | 6 +++--- .../entity/TableOperationHistoryRow.java | 4 ++-- .../optimizer/entity/TableOperationRow.java | 4 ++-- .../optimizer/entity/TableStatsRow.java | 6 +++--- .../TableOperationHistoryRepository.java | 6 +++--- .../repository/TableStatsRepository.java | 4 ++-- 11 files changed, 31 insertions(+), 31 deletions(-) diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java index 5ad653e97..343a0712e 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java @@ -121,7 +121,7 @@ public void analyze( log.info( "Created PENDING {} operation for table {}.{}", analyzer.getOperationType(), - table.getDatabaseId(), + table.getDatabaseName(), table.getTableId()); } })); diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java index a66a7b072..0590c2045 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java @@ -78,9 +78,9 @@ private boolean decideFromHistory(Optional latestHisto private boolean decideFromHistoryEntry(TableOperationHistoryRow entry) { switch (entry.getStatus()) { case "SUCCESS": - return pastInterval(entry.getSubmittedAt(), successRetryInterval); + return pastInterval(entry.getCompletedAt(), successRetryInterval); case "FAILED": - return pastInterval(entry.getSubmittedAt(), failureRetryInterval); + return pastInterval(entry.getCompletedAt(), failureRetryInterval); default: return true; } diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/Table.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/Table.java index d170f29dd..45e02fd60 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/Table.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/Table.java @@ -21,7 +21,7 @@ public class Table { private String tableUuid; - private String databaseId; + private String databaseName; private String tableId; @Builder.Default private Map tableProperties = Collections.emptyMap(); @@ -32,7 +32,7 @@ public class Table { public static Table from(TableStatsRow row) { return Table.builder() .tableUuid(row.getTableUuid()) - .databaseId(row.getDatabaseId()) + .databaseName(row.getDatabaseName()) .tableId(row.getTableName()) .tableProperties( row.getTableProperties() != null ? row.getTableProperties() : Collections.emptyMap()) diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java index 5a81a3848..97f4b9f96 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java @@ -59,7 +59,7 @@ public static TableOperation pending(Table table, String operationType) { TableOperation op = new TableOperation(); op.id = UUID.randomUUID().toString(); op.tableUuid = table.getTableUuid(); - op.databaseName = table.getDatabaseId(); + op.databaseName = table.getDatabaseName(); op.tableName = table.getTableId(); op.operationType = operationType; op.status = "PENDING"; diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java index 29fd20e50..2f3eee3d7 100644 --- a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java @@ -48,11 +48,11 @@ void setUp() { void analyze_insertsNewRow_forEligibleTableWithNoExistingOp() { TableStatsRow statsEntity = new TableStatsRow(); statsEntity.setTableUuid("uuid-1"); - statsEntity.setDatabaseId("db1"); + statsEntity.setDatabaseName("db1"); statsEntity.setTableName("tbl1"); Table expectedTable = - Table.builder().tableUuid("uuid-1").databaseId("db1").tableId("tbl1").build(); + Table.builder().tableUuid("uuid-1").databaseName("db1").tableId("tbl1").build(); when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); @@ -83,11 +83,11 @@ void analyze_insertsNewRow_forEligibleTableWithNoExistingOp() { void analyze_noOp_whenCadencePolicyReturnsFalseForPending() { TableStatsRow statsEntity = new TableStatsRow(); statsEntity.setTableUuid("uuid-1"); - statsEntity.setDatabaseId("db1"); + statsEntity.setDatabaseName("db1"); statsEntity.setTableName("tbl1"); Table expectedTable = - Table.builder().tableUuid("uuid-1").databaseId("db1").tableId("tbl1").build(); + Table.builder().tableUuid("uuid-1").databaseName("db1").tableId("tbl1").build(); TableOperationRow existingEntity = new TableOperationRow(); existingEntity.setId("existing-op-id"); @@ -185,11 +185,11 @@ void analyze_skipsTable_whenTableUuidIsNull() { void analyze_skipsTable_whenCircuitBreakerTrips() { TableStatsRow statsEntity = new TableStatsRow(); statsEntity.setTableUuid("uuid-1"); - statsEntity.setDatabaseId("db1"); + statsEntity.setDatabaseName("db1"); statsEntity.setTableName("tbl1"); Table expectedTable = - Table.builder().tableUuid("uuid-1").databaseId("db1").tableId("tbl1").build(); + Table.builder().tableUuid("uuid-1").databaseName("db1").tableId("tbl1").build(); List failures = IntStream.range(0, 3) @@ -199,7 +199,7 @@ void analyze_skipsTable_whenCircuitBreakerTrips() { .id("fail-" + i) .tableUuid("uuid-1") .operationType("ORPHAN_FILES_DELETION") - .submittedAt(Instant.now().minusSeconds(i * 60)) + .completedAt(Instant.now().minusSeconds(i * 60)) .status("FAILED") .build()) .collect(Collectors.toList()); @@ -225,11 +225,11 @@ void analyze_skipsTable_whenCircuitBreakerTrips() { void analyze_doesNotTrip_whenFewerFailuresThanThreshold() { TableStatsRow statsEntity = new TableStatsRow(); statsEntity.setTableUuid("uuid-1"); - statsEntity.setDatabaseId("db1"); + statsEntity.setDatabaseName("db1"); statsEntity.setTableName("tbl1"); Table expectedTable = - Table.builder().tableUuid("uuid-1").databaseId("db1").tableId("tbl1").build(); + Table.builder().tableUuid("uuid-1").databaseName("db1").tableId("tbl1").build(); List failures = IntStream.range(0, 3) @@ -239,7 +239,7 @@ void analyze_doesNotTrip_whenFewerFailuresThanThreshold() { .id("fail-" + i) .tableUuid("uuid-1") .operationType("ORPHAN_FILES_DELETION") - .submittedAt(Instant.now().minusSeconds(i * 60)) + .completedAt(Instant.now().minusSeconds(i * 60)) .status("FAILED") .build()) .collect(Collectors.toList()); diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzerTest.java index 171846ff8..f0e915059 100644 --- a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzerTest.java +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzerTest.java @@ -217,7 +217,7 @@ private Table tableWithProperty(String value) { : Map.of(OrphanFilesDeletionAnalyzer.OFD_ENABLED_PROPERTY, value); return Table.builder() .tableUuid("test-uuid") - .databaseId("db1") + .databaseName("db1") .tableId("tbl1") .tableProperties(props) .build(); @@ -230,12 +230,12 @@ private TableOperation opWithStatus(String status, Instant scheduledAt) { return op; } - private TableOperationHistoryRow historyWithStatus(String status, Instant submittedAt) { + private TableOperationHistoryRow historyWithStatus(String status, Instant completedAt) { return TableOperationHistoryRow.builder() .id("hist-id") .tableUuid("test-uuid") .operationType("ORPHAN_FILES_DELETION") - .submittedAt(submittedAt) + .completedAt(completedAt) .status(status) .build(); } diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java index 4e638e2e1..b05df0f1c 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java @@ -29,8 +29,8 @@ public class TableOperationHistoryRow { @Column(name = "operation_type", nullable = false, length = 50) private String operationType; - @Column(name = "submitted_at", nullable = false) - private Instant submittedAt; + @Column(name = "completed_at", nullable = false) + private Instant completedAt; @Column(name = "status", nullable = false, length = 20) private String status; diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java index fc0104604..33a83bd3f 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java @@ -28,10 +28,10 @@ public class TableOperationRow { @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; - @Column(name = "database_name", nullable = false, length = 255) + @Column(name = "database_name", nullable = false, length = 128) private String databaseName; - @Column(name = "table_name", nullable = false, length = 255) + @Column(name = "table_name", nullable = false, length = 128) private String tableName; @Column(name = "operation_type", nullable = false, length = 50) diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java index 5cdf16a97..bc647d86e 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java @@ -34,10 +34,10 @@ public class TableStatsRow { @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; - @Column(name = "database_id", nullable = false, length = 255) - private String databaseId; + @Column(name = "database_name", nullable = false, length = 128) + private String databaseName; - @Column(name = "table_name", nullable = false, length = 255) + @Column(name = "table_name", nullable = false, length = 128) private String tableName; @Type(type = "json") diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java index f2ea9e3c8..fd9edd1f4 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java @@ -13,7 +13,7 @@ public interface TableOperationHistoryRepository extends JpaRepository { /** - * Return history rows matching the given filters, ordered by {@code submittedAt} descending. + * Return history rows matching the given filters, ordered by {@code completedAt} descending. * Every parameter is optional — pass {@code null} to skip that filter. */ @Query( @@ -21,8 +21,8 @@ public interface TableOperationHistoryRepository + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + "AND (:status IS NULL OR r.status = :status) " - + "AND (:since IS NULL OR r.submittedAt >= :since) " - + "ORDER BY r.submittedAt DESC") + + "AND (:since IS NULL OR r.completedAt >= :since) " + + "ORDER BY r.completedAt DESC") List find( @Param("operationType") String operationType, @Param("tableUuid") String tableUuid, diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java index 6effe19c2..50f515d07 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java @@ -15,11 +15,11 @@ public interface TableStatsRepository extends JpaRepository find( - @Param("databaseId") String databaseId, + @Param("databaseName") String databaseName, @Param("tableName") String tableName, @Param("tableUuid") String tableUuid); } From 8054586519bab86fbabc0591e02ab141dff3cbfb Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Tue, 12 May 2026 12:15:53 -0700 Subject: [PATCH 04/19] refactor(optimizer-analyzer): delete unused AnalyzerConfig The empty @Configuration class did nothing. @SpringBootApplication on AnalyzerApplication already triggers @ComponentScan, which discovers all @Component-annotated beans without help. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../linkedin/openhouse/analyzer/config/AnalyzerConfig.java | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/config/AnalyzerConfig.java diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/config/AnalyzerConfig.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/config/AnalyzerConfig.java deleted file mode 100644 index 30ad9f55b..000000000 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/config/AnalyzerConfig.java +++ /dev/null @@ -1,7 +0,0 @@ -package com.linkedin.openhouse.analyzer.config; - -import org.springframework.context.annotation.Configuration; - -/** Spring configuration for the Analyzer. */ -@Configuration -public class AnalyzerConfig {} From 5af5f14ad32815d073ce3dc258abfbf77ce11620 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Tue, 12 May 2026 12:18:11 -0700 Subject: [PATCH 05/19] refactor(optimizer-analyzer): remove circuit breaker, defer with TODO The circuit breaker was hardcoded (threshold=5, no reset, no operator visibility) and forced the AnalyzerRunner to materialize the full history of every (table, operation_type) just to check the last N rows. Cadence policy only needs the single latest history entry; pulling everything was wasted I/O. Changes: - Remove getCircuitBreakerThreshold and isCircuitBroken from OperationAnalyzer. - Add a TODO documenting requirements for the eventual replacement (configurable threshold, exponential-backoff reset, operator-visible signal). - In AnalyzerRunner, fold history loading into a per-(uuid, type) map holding only the most-recent entry; drop the per-table history list and the isCircuitBroken call. - Add a TODO to switch the history scan to a windowed query that returns at most one row per (uuid, type). - Drop the two circuit-breaker tests from AnalyzerRunnerTest. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../openhouse/analyzer/AnalyzerRunner.java | 47 ++++++---- .../openhouse/analyzer/OperationAnalyzer.java | 38 ++------- .../analyzer/AnalyzerRunnerTest.java | 85 ------------------- 3 files changed, 36 insertions(+), 134 deletions(-) diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java index 343a0712e..e48bb241e 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java @@ -7,7 +7,8 @@ import com.linkedin.openhouse.optimizer.repository.TableOperationHistoryRepository; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; -import java.util.Collections; +import java.time.Instant; +import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.Optional; @@ -21,16 +22,15 @@ * Core analysis loop. Loads {@code table_stats} rows and evaluates each table against every * registered {@link OperationAnalyzer} in a single pass. * - *

The two sides of the join — current operations and circuit-breaker history — are loaded into - * memory once per run before the table loop. Both are naturally bounded (only tables with active or - * recently failed operations have rows), so holding them in maps is safe at any table scale. + *

Both sides of the join — current operations and latest history per (table, type) — are loaded + * into maps once per run before the table loop. Both are bounded by the number of tables, so + * holding them in memory is safe at any realistic scale. * *

// TODO: Iterate per-database instead of loading all tables at once. This scopes memory usage - * and allows incremental progress. When we go per-db we may still see 10k tables per iteration, but - * that should be fine. + * and allows incremental progress. * - *

// TODO: Add benchmarking and scale tests. Measure memory footprint at 10k tables per - * iteration to validate the in-memory join approach. + *

// TODO: Benchmark memory footprint at 10k tables per iteration to validate the in-memory join + * approach. */ @Slf4j @Component @@ -79,7 +79,10 @@ public void analyze( TableOperation::from, TableOperation::mostRecent)))); - Map>> historyByType = + // TODO(perf): replace this full-history scan with a windowed query that returns at most one + // row per (table_uuid, operation_type) — the analyzer only consumes the latest entry. Today + // this is O(H) per analyzer where H is total history rows; bounded but unnecessary. + Map> latestHistoryByType = activeAnalyzers.stream() .collect( Collectors.toMap( @@ -87,8 +90,12 @@ public void analyze( a -> historyRepo.find(a.getOperationType(), null, null, null, Pageable.unpaged()) .stream() + .filter(r -> r.getTableUuid() != null) .collect( - Collectors.groupingBy(TableOperationHistoryRow::getTableUuid)))); + Collectors.toMap( + TableOperationHistoryRow::getTableUuid, + r -> r, + AnalyzerRunner::moreRecentHistory)))); List

tables = statsRepo.find(databaseName, tableName, tableUuid).stream() @@ -108,14 +115,13 @@ public void analyze( Optional currentOp = Optional.ofNullable( opsByType.get(analyzer.getOperationType()).get(table.getTableUuid())); - List history = - historyByType - .get(analyzer.getOperationType()) - .getOrDefault(table.getTableUuid(), Collections.emptyList()); - Optional latestHistory = history.stream().findFirst(); + Optional latestHistory = + Optional.ofNullable( + latestHistoryByType + .get(analyzer.getOperationType()) + .get(table.getTableUuid())); - if (analyzer.shouldSchedule(table, currentOp, latestHistory) - && !analyzer.isCircuitBroken(table.getTableUuid(), history)) { + if (analyzer.shouldSchedule(table, currentOp, latestHistory)) { TableOperation op = TableOperation.pending(table, analyzer.getOperationType()); operationsRepo.save(op.toRow()); log.info( @@ -128,4 +134,11 @@ public void analyze( log.info("Analysis complete"); } + + private static TableOperationHistoryRow moreRecentHistory( + TableOperationHistoryRow a, TableOperationHistoryRow b) { + Comparator byCompletedAt = + Comparator.comparing(r -> r.getCompletedAt() != null ? r.getCompletedAt() : Instant.EPOCH); + return byCompletedAt.compare(a, b) >= 0 ? a : b; + } } diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java index 731c8127f..0d5fb6770 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java @@ -3,12 +3,17 @@ import com.linkedin.openhouse.analyzer.model.Table; import com.linkedin.openhouse.analyzer.model.TableOperation; import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; -import java.util.List; import java.util.Optional; /** * Strategy interface for a single operation type. Each implementation decides whether a given table * needs an operation recommendation upserted in the Optimizer Service. + * + *

// TODO(circuit-breaker): a chronically-failing table currently produces a new PENDING row on + * every Analyzer pass. Add a circuit breaker that suppresses scheduling for a (table, type) after N + * consecutive FAILED history entries. Requirements: configurable threshold per operation type, + * automatic reset via exponential backoff so tables can recover, and an operator-visible signal + * (metric or query) so tripped breakers are diagnosable. */ public interface OperationAnalyzer { @@ -32,35 +37,4 @@ boolean shouldSchedule( Table table, Optional currentOp, Optional latestHistory); - - /** - * Maximum number of consecutive FAILED history entries before the circuit breaker trips and - * scheduling is suppressed for this (table, operation_type). Override per operation type. Returns - * 0 to disable the circuit breaker. - */ - default int getCircuitBreakerThreshold() { - return 5; - } - - /** - * Returns {@code true} if the circuit breaker has tripped for this table. The default - * implementation checks whether the last N history entries are all FAILED. Individual analyzers - * can override this to implement different strategies (e.g., time-based backoff). - * - *

// TODO: Add circuit breaker reset with exponential backoff so tables can recover - * automatically after a cooldown period instead of staying tripped permanently. - * - *

// TODO: Add a communication path to surface tripped circuit breakers to users (e.g., - * metrics, alerts, or a dashboard query). - * - * @param tableUuid the table whose history to check - * @param history recent history entries for this (table, type), newest first - */ - default boolean isCircuitBroken(String tableUuid, List history) { - int threshold = getCircuitBreakerThreshold(); - if (threshold <= 0 || history.size() < threshold) { - return false; - } - return history.stream().limit(threshold).allMatch(r -> "FAILED".equals(r.getStatus())); - } } diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java index 2f3eee3d7..ad6938633 100644 --- a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java @@ -9,7 +9,6 @@ import com.linkedin.openhouse.analyzer.model.Table; import com.linkedin.openhouse.analyzer.model.TableOperation; -import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; import com.linkedin.openhouse.optimizer.entity.TableOperationRow; import com.linkedin.openhouse.optimizer.entity.TableStatsRow; import com.linkedin.openhouse.optimizer.repository.TableOperationHistoryRepository; @@ -19,8 +18,6 @@ import java.util.Collections; import java.util.List; import java.util.Optional; -import java.util.stream.Collectors; -import java.util.stream.IntStream; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @@ -56,8 +53,6 @@ void analyze_insertsNewRow_forEligibleTableWithNoExistingOp() { when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); - when(analyzer.isCircuitBroken(anyString(), any())).thenCallRealMethod(); - when(analyzer.getCircuitBreakerThreshold()).thenReturn(5); when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) .thenReturn(Collections.emptyList()); when(historyRepo.find("ORPHAN_FILES_DELETION", null, null, null, Pageable.unpaged())) @@ -180,84 +175,4 @@ void analyze_skipsTable_whenTableUuidIsNull() { verify(operationsRepo, never()).save(any()); } - - @Test - void analyze_skipsTable_whenCircuitBreakerTrips() { - TableStatsRow statsEntity = new TableStatsRow(); - statsEntity.setTableUuid("uuid-1"); - statsEntity.setDatabaseName("db1"); - statsEntity.setTableName("tbl1"); - - Table expectedTable = - Table.builder().tableUuid("uuid-1").databaseName("db1").tableId("tbl1").build(); - - List failures = - IntStream.range(0, 3) - .mapToObj( - i -> - TableOperationHistoryRow.builder() - .id("fail-" + i) - .tableUuid("uuid-1") - .operationType("ORPHAN_FILES_DELETION") - .completedAt(Instant.now().minusSeconds(i * 60)) - .status("FAILED") - .build()) - .collect(Collectors.toList()); - - when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); - when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); - when(analyzer.isCircuitBroken(anyString(), any())).thenCallRealMethod(); - when(analyzer.getCircuitBreakerThreshold()).thenReturn(3); - when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) - .thenReturn(Collections.emptyList()); - when(historyRepo.find("ORPHAN_FILES_DELETION", null, null, null, Pageable.unpaged())) - .thenReturn(failures); - when(analyzer.isEnabled(expectedTable)).thenReturn(true); - when(analyzer.shouldSchedule(expectedTable, Optional.empty(), Optional.of(failures.get(0)))) - .thenReturn(true); - - runner.analyze(); - - verify(operationsRepo, never()).save(any()); - } - - @Test - void analyze_doesNotTrip_whenFewerFailuresThanThreshold() { - TableStatsRow statsEntity = new TableStatsRow(); - statsEntity.setTableUuid("uuid-1"); - statsEntity.setDatabaseName("db1"); - statsEntity.setTableName("tbl1"); - - Table expectedTable = - Table.builder().tableUuid("uuid-1").databaseName("db1").tableId("tbl1").build(); - - List failures = - IntStream.range(0, 3) - .mapToObj( - i -> - TableOperationHistoryRow.builder() - .id("fail-" + i) - .tableUuid("uuid-1") - .operationType("ORPHAN_FILES_DELETION") - .completedAt(Instant.now().minusSeconds(i * 60)) - .status("FAILED") - .build()) - .collect(Collectors.toList()); - - when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); - when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); - when(analyzer.isCircuitBroken(anyString(), any())).thenCallRealMethod(); - when(analyzer.getCircuitBreakerThreshold()).thenReturn(5); - when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) - .thenReturn(Collections.emptyList()); - when(historyRepo.find("ORPHAN_FILES_DELETION", null, null, null, Pageable.unpaged())) - .thenReturn(failures); - when(analyzer.isEnabled(expectedTable)).thenReturn(true); - when(analyzer.shouldSchedule(expectedTable, Optional.empty(), Optional.of(failures.get(0)))) - .thenReturn(true); - - runner.analyze(); - - verify(operationsRepo).save(any()); - } } From c4f194ac19bc57a88c7337b942ad7282ebde4f80 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Tue, 12 May 2026 12:21:40 -0700 Subject: [PATCH 06/19] perf(optimizer-analyzer): use findLatestPerTable for history lookup Switch the AnalyzerRunner from scanning every history row per analyzer pass to the dedicated findLatestPerTable query (added in apps/optimizer). The analyzer only consumes the latest entry per (table_uuid, operation_type); the previous full-history scan was bounded but unnecessary I/O. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../linkedin/openhouse/analyzer/AnalyzerRunner.java | 9 +++------ .../openhouse/analyzer/AnalyzerRunnerTest.java | 12 +++++------- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java index e48bb241e..b4ca06966 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java @@ -15,7 +15,6 @@ import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -import org.springframework.data.domain.Pageable; import org.springframework.stereotype.Component; /** @@ -79,17 +78,15 @@ public void analyze( TableOperation::from, TableOperation::mostRecent)))); - // TODO(perf): replace this full-history scan with a windowed query that returns at most one - // row per (table_uuid, operation_type) — the analyzer only consumes the latest entry. Today - // this is O(H) per analyzer where H is total history rows; bounded but unnecessary. + // Latest history row per (table_uuid, operation_type), one query per analyzer. The repo query + // may return tied rows for the same key on identical completed_at; dedupe in memory. Map> latestHistoryByType = activeAnalyzers.stream() .collect( Collectors.toMap( OperationAnalyzer::getOperationType, a -> - historyRepo.find(a.getOperationType(), null, null, null, Pageable.unpaged()) - .stream() + historyRepo.findLatestPerTable(a.getOperationType()).stream() .filter(r -> r.getTableUuid() != null) .collect( Collectors.toMap( diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java index ad6938633..8c8bb8145 100644 --- a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java @@ -24,7 +24,6 @@ import org.mockito.ArgumentCaptor; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; -import org.springframework.data.domain.Pageable; @ExtendWith(MockitoExtension.class) class AnalyzerRunnerTest { @@ -55,7 +54,7 @@ void analyze_insertsNewRow_forEligibleTableWithNoExistingOp() { when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) .thenReturn(Collections.emptyList()); - when(historyRepo.find("ORPHAN_FILES_DELETION", null, null, null, Pageable.unpaged())) + when(historyRepo.findLatestPerTable("ORPHAN_FILES_DELETION")) .thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(true); when(analyzer.shouldSchedule(expectedTable, Optional.empty(), Optional.empty())) @@ -95,7 +94,7 @@ void analyze_noOp_whenCadencePolicyReturnsFalseForPending() { when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) .thenReturn(List.of(existingEntity)); - when(historyRepo.find("ORPHAN_FILES_DELETION", null, null, null, Pageable.unpaged())) + when(historyRepo.findLatestPerTable("ORPHAN_FILES_DELETION")) .thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(true); @@ -119,7 +118,7 @@ void analyze_skipsTable_whenNotEnabled() { when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) .thenReturn(Collections.emptyList()); - when(historyRepo.find("ORPHAN_FILES_DELETION", null, null, null, Pageable.unpaged())) + when(historyRepo.findLatestPerTable("ORPHAN_FILES_DELETION")) .thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(false); @@ -146,7 +145,7 @@ void analyze_skipsTable_whenShouldScheduleReturnsFalse() { when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) .thenReturn(List.of(scheduled)); - when(historyRepo.find("ORPHAN_FILES_DELETION", null, null, null, Pageable.unpaged())) + when(historyRepo.findLatestPerTable("ORPHAN_FILES_DELETION")) .thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(true); @@ -168,8 +167,7 @@ void analyze_skipsTable_whenTableUuidIsNull() { when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) .thenReturn(Collections.emptyList()); - when(historyRepo.find(anyString(), any(), any(), any(), any())) - .thenReturn(Collections.emptyList()); + when(historyRepo.findLatestPerTable(anyString())).thenReturn(Collections.emptyList()); runner.analyze(); From 6da624ab4836f7d249fa712a1676924939d26137 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Tue, 12 May 2026 12:31:24 -0700 Subject: [PATCH 07/19] refactor(optimizer-analyzer): typed OperationType/Status, polish cadence + TableOperation The analyzer was using raw Strings everywhere for operation type and status. Per-layer types: introduce analyzer-internal OperationType and OperationStatus enums in apps/optimizer-analyzer/model and convert at the entity boundary. The wire API (services/optimizer/api/model) and DB columns (apps/optimizer entity rows) keep their own representations and are unaffected. Changes: - New enums OperationType and OperationStatus in the analyzer model package. - TableOperation: operationType and status become enums. from(row) parses the String columns; toRow() emits .name() back. from() and pending() share a private build() factory. - TableOperation javadocs: drop "denormalized for display" wording. - OperationAnalyzer.getOperationType returns OperationType. - AnalyzerRunner: filter parameter and per-type maps are keyed on OperationType; calls to repos still pass the String .name(). - CadencePolicy.shouldSchedule: switch on OperationStatus is exhaustive (now including CANCELED), unknown values throw IllegalStateException, and the SCHEDULED branch has an inline comment explaining the two cases. - OrphanFilesDeletionAnalyzer: returns the enum. - Tests updated to construct enum values; OFD test helper takes the enum. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../openhouse/analyzer/AnalyzerRunner.java | 18 +++-- .../openhouse/analyzer/CadencePolicy.java | 28 ++++--- .../openhouse/analyzer/OperationAnalyzer.java | 5 +- .../analyzer/OrphanFilesDeletionAnalyzer.java | 8 +- .../analyzer/model/OperationStatus.java | 14 ++++ .../analyzer/model/OperationType.java | 10 +++ .../analyzer/model/TableOperation.java | 76 ++++++++++++------- .../analyzer/AnalyzerRunnerTest.java | 46 +++++------ .../OrphanFilesDeletionAnalyzerTest.java | 21 ++--- 9 files changed, 138 insertions(+), 88 deletions(-) create mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/OperationStatus.java create mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/OperationType.java diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java index b4ca06966..22da50d01 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.analyzer; +import com.linkedin.openhouse.analyzer.model.OperationType; import com.linkedin.openhouse.analyzer.model.Table; import com.linkedin.openhouse.analyzer.model.TableOperation; import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; @@ -51,25 +52,30 @@ public void analyze() { * table UUID. Pass {@code null} for any parameter to skip that filter. */ public void analyze( - String operationType, String databaseName, String tableName, String tableUuid) { + OperationType operationType, String databaseName, String tableName, String tableUuid) { List activeAnalyzers = operationType == null ? analyzers : analyzers.stream() - .filter(a -> a.getOperationType().equals(operationType)) + .filter(a -> a.getOperationType() == operationType) .collect(Collectors.toList()); // Pre-load the small sides of the joins — one query per analyzer type. // TODO: Move to a query builder (Criteria API or jOOQ) as filter count grows. - Map> opsByType = + Map> opsByType = activeAnalyzers.stream() .collect( Collectors.toMap( OperationAnalyzer::getOperationType, a -> operationsRepo - .find(a.getOperationType(), null, tableUuid, databaseName, tableName) + .find( + a.getOperationType().name(), + null, + tableUuid, + databaseName, + tableName) .stream() .filter(e -> e.getTableUuid() != null) .collect( @@ -80,13 +86,13 @@ public void analyze( // Latest history row per (table_uuid, operation_type), one query per analyzer. The repo query // may return tied rows for the same key on identical completed_at; dedupe in memory. - Map> latestHistoryByType = + Map> latestHistoryByType = activeAnalyzers.stream() .collect( Collectors.toMap( OperationAnalyzer::getOperationType, a -> - historyRepo.findLatestPerTable(a.getOperationType()).stream() + historyRepo.findLatestPerTable(a.getOperationType().name()).stream() .filter(r -> r.getTableUuid() != null) .collect( Collectors.toMap( diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java index 0590c2045..4cf892021 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java @@ -55,16 +55,22 @@ public boolean shouldSchedule( } TableOperation op = currentOp.get(); switch (op.getStatus()) { - case "PENDING": - case "SCHEDULING": + case PENDING: + case SCHEDULING: return false; - case "SCHEDULED": - if (latestHistory.isEmpty()) { - return pastInterval(op.getScheduledAt(), scheduledTimeout); - } - return decideFromHistoryEntry(latestHistory.get()); + case SCHEDULED: + // Two scenarios for a SCHEDULED row: + // - no history yet: the job is still running (or crashed); fall through to the + // scheduledTimeout safety net. + // - history present: the job completed and history was written; defer to cadence policy + // on the history entry. + return latestHistory.isPresent() + ? decideFromHistory(latestHistory) + : pastInterval(op.getScheduledAt(), scheduledTimeout); + case CANCELED: + return decideFromHistory(latestHistory); default: - return true; + throw new IllegalStateException("Unhandled operation status: " + op.getStatus()); } } @@ -72,10 +78,7 @@ private boolean decideFromHistory(Optional latestHisto if (latestHistory.isEmpty()) { return true; } - return decideFromHistoryEntry(latestHistory.get()); - } - - private boolean decideFromHistoryEntry(TableOperationHistoryRow entry) { + TableOperationHistoryRow entry = latestHistory.get(); switch (entry.getStatus()) { case "SUCCESS": return pastInterval(entry.getCompletedAt(), successRetryInterval); @@ -86,6 +89,7 @@ private boolean decideFromHistoryEntry(TableOperationHistoryRow entry) { } } + /** {@code true} if {@code timestamp} is null or {@code interval} has elapsed since then. */ private boolean pastInterval(Instant timestamp, Duration interval) { return timestamp == null || Duration.between(timestamp, Instant.now()).compareTo(interval) > 0; } diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java index 0d5fb6770..33f2b8e5d 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.analyzer; +import com.linkedin.openhouse.analyzer.model.OperationType; import com.linkedin.openhouse.analyzer.model.Table; import com.linkedin.openhouse.analyzer.model.TableOperation; import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; @@ -17,8 +18,8 @@ */ public interface OperationAnalyzer { - /** The operation type this analyzer handles (e.g., {@code "ORPHAN_FILES_DELETION"}). */ - String getOperationType(); + /** The operation type this analyzer handles. */ + OperationType getOperationType(); /** * Returns {@code true} if this operation is opted-in for the given table. Tables that return diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzer.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzer.java index c348b0265..450fda293 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzer.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzer.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.analyzer; +import com.linkedin.openhouse.analyzer.model.OperationType; import com.linkedin.openhouse.analyzer.model.Table; import com.linkedin.openhouse.analyzer.model.TableOperation; import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; @@ -9,11 +10,10 @@ import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Component; -/** Analyzer for the {@code ORPHAN_FILES_DELETION} operation type. */ +/** Analyzer for the {@link OperationType#ORPHAN_FILES_DELETION} operation type. */ @Component public class OrphanFilesDeletionAnalyzer implements OperationAnalyzer { - static final String OPERATION_TYPE = "ORPHAN_FILES_DELETION"; static final String OFD_ENABLED_PROPERTY = "maintenance.optimizer.ofd.enabled"; private final CadencePolicy cadencePolicy; @@ -36,8 +36,8 @@ public OrphanFilesDeletionAnalyzer( } @Override - public String getOperationType() { - return OPERATION_TYPE; + public OperationType getOperationType() { + return OperationType.ORPHAN_FILES_DELETION; } @Override diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/OperationStatus.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/OperationStatus.java new file mode 100644 index 000000000..8a2d1d541 --- /dev/null +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/OperationStatus.java @@ -0,0 +1,14 @@ +package com.linkedin.openhouse.analyzer.model; + +/** + * Analyzer-internal lifecycle states. The analyzer only writes {@link #PENDING}; the other values + * are read off existing rows when deciding whether to re-issue a recommendation. + * + *

Intentionally separate from the wire-API and DB representations. + */ +public enum OperationStatus { + PENDING, + SCHEDULING, + SCHEDULED, + CANCELED +} diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/OperationType.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/OperationType.java new file mode 100644 index 000000000..da48bb459 --- /dev/null +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/OperationType.java @@ -0,0 +1,10 @@ +package com.linkedin.openhouse.analyzer.model; + +/** + * Analyzer-internal enum for the operation types this app knows how to schedule. Intentionally + * separate from the wire-API and DB representations so the analyzer can evolve its set of supported + * operations without churning either boundary. + */ +public enum OperationType { + ORPHAN_FILES_DELETION +} diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java index 97f4b9f96..54e569b6a 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java @@ -22,17 +22,17 @@ public class TableOperation { /** The table this operation targets. */ private String tableUuid; - /** Database name, denormalized for display. */ + /** Database name. */ private String databaseName; - /** Table name, denormalized for display. */ + /** Table name. */ private String tableName; - /** Operation type (e.g., {@code "ORPHAN_FILES_DELETION"}). */ - private String operationType; + /** Operation type. */ + private OperationType operationType; - /** Current lifecycle status: PENDING, SCHEDULING, SCHEDULED. */ - private String status; + /** Current lifecycle status. */ + private OperationStatus status; /** When this operation record was created. */ private Instant createdAt; @@ -42,29 +42,28 @@ public class TableOperation { /** Build a {@code TableOperation} from an existing JPA row. */ public static TableOperation from(TableOperationRow row) { - TableOperation op = new TableOperation(); - op.id = row.getId(); - op.tableUuid = row.getTableUuid(); - op.databaseName = row.getDatabaseName(); - op.tableName = row.getTableName(); - op.operationType = row.getOperationType(); - op.status = row.getStatus(); - op.createdAt = row.getCreatedAt(); - op.scheduledAt = row.getScheduledAt(); - return op; + return build( + row.getId(), + row.getTableUuid(), + row.getDatabaseName(), + row.getTableName(), + OperationType.valueOf(row.getOperationType()), + OperationStatus.valueOf(row.getStatus()), + row.getCreatedAt(), + row.getScheduledAt()); } /** Create a new PENDING operation for the given table and operation type. */ - public static TableOperation pending(Table table, String operationType) { - TableOperation op = new TableOperation(); - op.id = UUID.randomUUID().toString(); - op.tableUuid = table.getTableUuid(); - op.databaseName = table.getDatabaseName(); - op.tableName = table.getTableId(); - op.operationType = operationType; - op.status = "PENDING"; - op.createdAt = Instant.now(); - return op; + public static TableOperation pending(Table table, OperationType operationType) { + return build( + UUID.randomUUID().toString(), + table.getTableUuid(), + table.getDatabaseName(), + table.getTableId(), + operationType, + OperationStatus.PENDING, + Instant.now(), + null); } /** Convert to a JPA entity for persistence. */ @@ -74,8 +73,8 @@ public TableOperationRow toRow() { .tableUuid(tableUuid) .databaseName(databaseName) .tableName(tableName) - .operationType(operationType) - .status(status) + .operationType(operationType.name()) + .status(status.name()) .createdAt(createdAt) .scheduledAt(scheduledAt) .version(0L) @@ -88,4 +87,25 @@ public static TableOperation mostRecent(TableOperation a, TableOperation b) { Comparator.comparing(r -> r.getCreatedAt() != null ? r.getCreatedAt() : Instant.EPOCH); return byCreatedAt.compare(a, b) >= 0 ? a : b; } + + private static TableOperation build( + String id, + String tableUuid, + String databaseName, + String tableName, + OperationType operationType, + OperationStatus status, + Instant createdAt, + Instant scheduledAt) { + TableOperation op = new TableOperation(); + op.id = id; + op.tableUuid = tableUuid; + op.databaseName = databaseName; + op.tableName = tableName; + op.operationType = operationType; + op.status = status; + op.createdAt = createdAt; + op.scheduledAt = scheduledAt; + return op; + } } diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java index 8c8bb8145..1feafba29 100644 --- a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java @@ -7,6 +7,7 @@ import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; +import com.linkedin.openhouse.analyzer.model.OperationType; import com.linkedin.openhouse.analyzer.model.Table; import com.linkedin.openhouse.analyzer.model.TableOperation; import com.linkedin.openhouse.optimizer.entity.TableOperationRow; @@ -28,6 +29,8 @@ @ExtendWith(MockitoExtension.class) class AnalyzerRunnerTest { + private static final String OFD = OperationType.ORPHAN_FILES_DELETION.name(); + @Mock private TableStatsRepository statsRepo; @Mock private TableOperationsRepository operationsRepo; @Mock private TableOperationHistoryRepository historyRepo; @@ -51,11 +54,9 @@ void analyze_insertsNewRow_forEligibleTableWithNoExistingOp() { Table.builder().tableUuid("uuid-1").databaseName("db1").tableId("tbl1").build(); when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); - when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); - when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) - .thenReturn(Collections.emptyList()); - when(historyRepo.findLatestPerTable("ORPHAN_FILES_DELETION")) - .thenReturn(Collections.emptyList()); + when(analyzer.getOperationType()).thenReturn(OperationType.ORPHAN_FILES_DELETION); + when(operationsRepo.find(OFD, null, null, null, null)).thenReturn(Collections.emptyList()); + when(historyRepo.findLatestPerTable(OFD)).thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(true); when(analyzer.shouldSchedule(expectedTable, Optional.empty(), Optional.empty())) .thenReturn(true); @@ -68,7 +69,7 @@ void analyze_insertsNewRow_forEligibleTableWithNoExistingOp() { assertThat(saved.getTableUuid()).isEqualTo("uuid-1"); assertThat(saved.getDatabaseName()).isEqualTo("db1"); assertThat(saved.getTableName()).isEqualTo("tbl1"); - assertThat(saved.getOperationType()).isEqualTo("ORPHAN_FILES_DELETION"); + assertThat(saved.getOperationType()).isEqualTo(OFD); assertThat(saved.getStatus()).isEqualTo("PENDING"); assertThat(saved.getId()).isNotNull(); } @@ -87,15 +88,13 @@ void analyze_noOp_whenCadencePolicyReturnsFalseForPending() { existingEntity.setId("existing-op-id"); existingEntity.setStatus("PENDING"); existingEntity.setTableUuid("uuid-1"); - existingEntity.setOperationType("ORPHAN_FILES_DELETION"); + existingEntity.setOperationType(OFD); existingEntity.setCreatedAt(Instant.now()); when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); - when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); - when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) - .thenReturn(List.of(existingEntity)); - when(historyRepo.findLatestPerTable("ORPHAN_FILES_DELETION")) - .thenReturn(Collections.emptyList()); + when(analyzer.getOperationType()).thenReturn(OperationType.ORPHAN_FILES_DELETION); + when(operationsRepo.find(OFD, null, null, null, null)).thenReturn(List.of(existingEntity)); + when(historyRepo.findLatestPerTable(OFD)).thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(true); TableOperation existingOp = TableOperation.from(existingEntity); @@ -115,11 +114,9 @@ void analyze_skipsTable_whenNotEnabled() { Table expectedTable = Table.builder().tableUuid("uuid-1").build(); when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); - when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); - when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) - .thenReturn(Collections.emptyList()); - when(historyRepo.findLatestPerTable("ORPHAN_FILES_DELETION")) - .thenReturn(Collections.emptyList()); + when(analyzer.getOperationType()).thenReturn(OperationType.ORPHAN_FILES_DELETION); + when(operationsRepo.find(OFD, null, null, null, null)).thenReturn(Collections.emptyList()); + when(historyRepo.findLatestPerTable(OFD)).thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(false); runner.analyze(); @@ -138,15 +135,13 @@ void analyze_skipsTable_whenShouldScheduleReturnsFalse() { scheduled.setId("op-id"); scheduled.setStatus("SCHEDULED"); scheduled.setTableUuid("uuid-1"); - scheduled.setOperationType("ORPHAN_FILES_DELETION"); + scheduled.setOperationType(OFD); scheduled.setCreatedAt(Instant.now()); when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); - when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); - when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) - .thenReturn(List.of(scheduled)); - when(historyRepo.findLatestPerTable("ORPHAN_FILES_DELETION")) - .thenReturn(Collections.emptyList()); + when(analyzer.getOperationType()).thenReturn(OperationType.ORPHAN_FILES_DELETION); + when(operationsRepo.find(OFD, null, null, null, null)).thenReturn(List.of(scheduled)); + when(historyRepo.findLatestPerTable(OFD)).thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(true); TableOperation scheduledOp = TableOperation.from(scheduled); @@ -164,9 +159,8 @@ void analyze_skipsTable_whenTableUuidIsNull() { statsEntity.setTableUuid(null); when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); - when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); - when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) - .thenReturn(Collections.emptyList()); + when(analyzer.getOperationType()).thenReturn(OperationType.ORPHAN_FILES_DELETION); + when(operationsRepo.find(OFD, null, null, null, null)).thenReturn(Collections.emptyList()); when(historyRepo.findLatestPerTable(anyString())).thenReturn(Collections.emptyList()); runner.analyze(); diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzerTest.java index f0e915059..50d426eef 100644 --- a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzerTest.java +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzerTest.java @@ -2,6 +2,7 @@ import static org.assertj.core.api.Assertions.assertThat; +import com.linkedin.openhouse.analyzer.model.OperationStatus; import com.linkedin.openhouse.analyzer.model.Table; import com.linkedin.openhouse.analyzer.model.TableOperation; import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; @@ -111,7 +112,7 @@ void shouldSchedule_pending_returnsFalse() { assertThat( analyzer.shouldSchedule( tableWithProperty("true"), - Optional.of(opWithStatus("PENDING", null)), + Optional.of(opWithStatus(OperationStatus.PENDING, null)), Optional.empty())) .isFalse(); } @@ -121,7 +122,7 @@ void shouldSchedule_scheduling_returnsFalse() { assertThat( analyzer.shouldSchedule( tableWithProperty("true"), - Optional.of(opWithStatus("SCHEDULING", null)), + Optional.of(opWithStatus(OperationStatus.SCHEDULING, null)), Optional.empty())) .isFalse(); } @@ -134,7 +135,7 @@ void shouldSchedule_scheduledNoHistory_withinTimeout_returnsFalse() { assertThat( analyzer.shouldSchedule( tableWithProperty("true"), - Optional.of(opWithStatus("SCHEDULED", recent)), + Optional.of(opWithStatus(OperationStatus.SCHEDULED, recent)), Optional.empty())) .isFalse(); } @@ -145,7 +146,7 @@ void shouldSchedule_scheduledNoHistory_pastTimeout_returnsTrue() { assertThat( analyzer.shouldSchedule( tableWithProperty("true"), - Optional.of(opWithStatus("SCHEDULED", longAgo)), + Optional.of(opWithStatus(OperationStatus.SCHEDULED, longAgo)), Optional.empty())) .isTrue(); } @@ -155,7 +156,7 @@ void shouldSchedule_scheduledWithNullScheduledAt_noHistory_returnsTrue() { assertThat( analyzer.shouldSchedule( tableWithProperty("true"), - Optional.of(opWithStatus("SCHEDULED", null)), + Optional.of(opWithStatus(OperationStatus.SCHEDULED, null)), Optional.empty())) .isTrue(); } @@ -167,7 +168,7 @@ void shouldSchedule_scheduledWithSuccessHistory_afterCooldown_returnsTrue() { assertThat( analyzer.shouldSchedule( tableWithProperty("true"), - Optional.of(opWithStatus("SCHEDULED", scheduledAt)), + Optional.of(opWithStatus(OperationStatus.SCHEDULED, scheduledAt)), Optional.of(historyWithStatus("SUCCESS", historyAt)))) .isTrue(); } @@ -179,7 +180,7 @@ void shouldSchedule_scheduledWithSuccessHistory_beforeCooldown_returnsFalse() { assertThat( analyzer.shouldSchedule( tableWithProperty("true"), - Optional.of(opWithStatus("SCHEDULED", scheduledAt)), + Optional.of(opWithStatus(OperationStatus.SCHEDULED, scheduledAt)), Optional.of(historyWithStatus("SUCCESS", historyAt)))) .isFalse(); } @@ -191,7 +192,7 @@ void shouldSchedule_scheduledWithFailedHistory_afterRetry_returnsTrue() { assertThat( analyzer.shouldSchedule( tableWithProperty("true"), - Optional.of(opWithStatus("SCHEDULED", scheduledAt)), + Optional.of(opWithStatus(OperationStatus.SCHEDULED, scheduledAt)), Optional.of(historyWithStatus("FAILED", historyAt)))) .isTrue(); } @@ -203,7 +204,7 @@ void shouldSchedule_scheduledWithFailedHistory_beforeRetry_returnsFalse() { assertThat( analyzer.shouldSchedule( tableWithProperty("true"), - Optional.of(opWithStatus("SCHEDULED", scheduledAt)), + Optional.of(opWithStatus(OperationStatus.SCHEDULED, scheduledAt)), Optional.of(historyWithStatus("FAILED", historyAt)))) .isFalse(); } @@ -223,7 +224,7 @@ private Table tableWithProperty(String value) { .build(); } - private TableOperation opWithStatus(String status, Instant scheduledAt) { + private TableOperation opWithStatus(OperationStatus status, Instant scheduledAt) { TableOperation op = new TableOperation(); op.setStatus(status); op.setScheduledAt(scheduledAt); From 52ba8583781971a57c080a56affa881a88f0d7f4 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Tue, 12 May 2026 12:32:43 -0700 Subject: [PATCH 08/19] =?UTF-8?q?refactor(optimizer-analyzer):=20rename=20?= =?UTF-8?q?OrphanFilesDeletionAnalyzer=20=E2=86=92=20CadenceBasedOrphanFil?= =?UTF-8?q?esDeletionAnalyzer?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The class composes CadencePolicy and is one of potentially many strategies (volume-based, schema-aware, etc.) we could write later for the same operation type. Encode the scheduling driver in the class name so the distinction is visible at registration. Co-Authored-By: Claude Opus 4.7 (1M context) --- ...Analyzer.java => CadenceBasedOrphanFilesDeletionAnalyzer.java} | 0 ...Test.java => CadenceBasedOrphanFilesDeletionAnalyzerTest.java} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/{OrphanFilesDeletionAnalyzer.java => CadenceBasedOrphanFilesDeletionAnalyzer.java} (100%) rename apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/{OrphanFilesDeletionAnalyzerTest.java => CadenceBasedOrphanFilesDeletionAnalyzerTest.java} (100%) diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzer.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java similarity index 100% rename from apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzer.java rename to apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java similarity index 100% rename from apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzerTest.java rename to apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java From beedad8ccc57c425b2e3e3ba6ec59d232b5f538d Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Tue, 12 May 2026 12:33:10 -0700 Subject: [PATCH 09/19] fix(optimizer-analyzer): update class name inside renamed files The rename in the previous commit moved the files but did not change the class identifiers inside. Update both class declarations and the constructor calls in the test to match the new file name. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java | 6 +++--- .../CadenceBasedOrphanFilesDeletionAnalyzerTest.java | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java index 450fda293..c50025b6a 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java @@ -12,14 +12,14 @@ /** Analyzer for the {@link OperationType#ORPHAN_FILES_DELETION} operation type. */ @Component -public class OrphanFilesDeletionAnalyzer implements OperationAnalyzer { +public class CadenceBasedOrphanFilesDeletionAnalyzer implements OperationAnalyzer { static final String OFD_ENABLED_PROPERTY = "maintenance.optimizer.ofd.enabled"; private final CadencePolicy cadencePolicy; @Autowired - public OrphanFilesDeletionAnalyzer( + public CadenceBasedOrphanFilesDeletionAnalyzer( @Value("${ofd.success-retry-hours:24}") long successRetryHours, @Value("${ofd.failure-retry-hours:1}") long failureRetryHours, @Value("${ofd.scheduled-timeout-hours:6}") long scheduledTimeoutHours) { @@ -31,7 +31,7 @@ public OrphanFilesDeletionAnalyzer( } /** Package-private for tests that supply a pre-built {@link CadencePolicy}. */ - OrphanFilesDeletionAnalyzer(CadencePolicy cadencePolicy) { + CadenceBasedOrphanFilesDeletionAnalyzer(CadencePolicy cadencePolicy) { this.cadencePolicy = cadencePolicy; } diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java index 50d426eef..9a847f34c 100644 --- a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java @@ -14,18 +14,18 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -class OrphanFilesDeletionAnalyzerTest { +class CadenceBasedOrphanFilesDeletionAnalyzerTest { private static final Duration SUCCESS_INTERVAL = Duration.ofHours(24); private static final Duration FAILURE_INTERVAL = Duration.ofHours(1); private static final Duration SCHEDULED_TIMEOUT = Duration.ofHours(6); - private OrphanFilesDeletionAnalyzer analyzer; + private CadenceBasedOrphanFilesDeletionAnalyzer analyzer; @BeforeEach void setUp() { analyzer = - new OrphanFilesDeletionAnalyzer( + new CadenceBasedOrphanFilesDeletionAnalyzer( new CadencePolicy(SUCCESS_INTERVAL, FAILURE_INTERVAL, SCHEDULED_TIMEOUT)); } @@ -215,7 +215,7 @@ private Table tableWithProperty(String value) { Map props = value == null ? Collections.emptyMap() - : Map.of(OrphanFilesDeletionAnalyzer.OFD_ENABLED_PROPERTY, value); + : Map.of(CadenceBasedOrphanFilesDeletionAnalyzer.OFD_ENABLED_PROPERTY, value); return Table.builder() .tableUuid("test-uuid") .databaseName("db1") From f663537f5da02faac4aa3ef0f68ede6324a5971f Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 13 May 2026 08:57:27 -0700 Subject: [PATCH 10/19] docs(optimizer-analyzer): add scale roadmap as block comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Capture optimizations (a)–(m) discussed for scaling AnalyzerRunner past ~100k tables, the failure modes that trigger each, and which items have already landed. (d) — the table_operations_history index on (operation_type, table_uuid, completed_at) — landed on optimizer-1 and is noted inline. The remaining items stay queued. Also tighten the class javadoc: drop the misleading "safe at any realistic scale" wording and the two prior inline TODOs (now subsumed by the block comment), point readers at the roadmap. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../openhouse/analyzer/AnalyzerRunner.java | 86 +++++++++++++++++-- 1 file changed, 78 insertions(+), 8 deletions(-) diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java index 22da50d01..e4084884f 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java @@ -18,19 +18,89 @@ import lombok.extern.slf4j.Slf4j; import org.springframework.stereotype.Component; +/* + * Performance roadmap — read before scaling this runner past ~100k tables. + * + * Current shape: each pass loads all table_stats, all active table_operations + * per op type, and the latest history row per (table_uuid, op_type) per op + * type, then iterates (table × analyzer) in memory. Heap is O(tables) and + * CPU is O(tables × op_types) per pass. This works at ~100k tables / daily + * cadence with ~8 GB heap. It does NOT work at 1M tables / 15-min cadence: + * heap peaks past 10 GB during ops loading, the history correlated subquery + * does not complete without index (d) below, and per-pass wall time exceeds + * the cadence window. + * + * The optimizations below are roughly ordered by when they matter. Items + * (a)–(d) are hard prerequisites for scaling past ~100k tables; the rest + * unlock further headroom. Items already landed are noted inline. + * + * Schema / data-model prerequisites: + * (a) Denormalized per-operation enablement on table_stats (or a sibling + * table), kept current by commit hooks on table-property changes. Each + * op type has its own enable flag — this mirrors the existing + * enable/disable pattern for non-optimizer maintenance operations on + * table properties (e.g. maintenance.optimizer.ofd.enabled). Lets the + * analyzer filter opted-in (table, op_type) pairs at index level + * instead of parsing the tableProperties JSON for every row. 10–100× + * data reduction in early rollout where opt-in is selective per op. + * (b) Index on table_operations(table_uuid, operation_type, created_at) — + * drives the cooldown anti-join in (e)/(f). Lands with that query. + * (c) Index supporting per-op opt-in lookup on the denormalized structure + * from (a). Shape depends on (a)'s representation. Lands with (e)/(f). + * (d) Index on table_operations_history(operation_type, table_uuid, + * completed_at) — makes findLatestPerTable O(N log N) instead of + * O(N²). Without this the query does not complete at 1M-row scale. + * LANDED: idx_toph_optype_uuid_completed. + * + * Query shape: + * (e) findCandidatesByDatabase(db, opType, cooldown) repo method that + * pushes both the per-op opt-in filter (from (a)) and the cooldown + * predicate to SQL via NOT EXISTS. In-cooldown and opted-out tables + * produce zero rows; their op rows are never materialized in the + * application. Single biggest win on data transfer and heap. + * (f) Combined-op variant of (e): one query per db returns candidates + * across all op types in one shot. Cuts read QPS ~10× at the same + * data volume (20k queries/pass vs 200k). + * (g) Per-database iteration replacing the current global scan. Bounds + * working set to one db (1–10k tables) per pass instead of the full + * table count. + * (h) Replace the findLatestPerTable consumer with the (e)/(f) anti-join, + * encoding cadence policy (success-retry-interval, failure-retry- + * interval, scheduled-timeout) in SQL WHERE clauses. If this subsumes + * all cadence reads, historyRepo on this class becomes dead and can + * be removed; if some strategies still need Java-side cadence on + * history rows, keep it. + * + * Projection / IO: + * (i) Drop tableProperties from the stats query projection once (a) lands. + * The TEXT column carries multi-KB JSON per row; at 1M rows that's + * gigabytes of wire transfer per pass for data nobody reads. + * (j) High-volume repo reads must page or stream — Stream or + * paginated cursor, not List. JPA materializes the full list + * before stream-collect, which is the dominant heap-spike source. + * Treat this as a baseline requirement, not an optional optimization. + * (k) Batch PENDING INSERTs via saveAll(500–1000) and set + * hibernate.jdbc.batch_size. Per-row save() dominates the write phase + * at any meaningful candidate volume. + * + * Runtime / deployment: + * (l) Rate-limit queries per analyzer instance to bound MySQL load. + * The runner should pace its per-db iteration across the cadence + * window rather than racing — converts a 2k-QPS spike at the start + * of each pass into a flat ~20 QPS sustained. + * (m) Conditional, only if single-instance runtime exceeds the cadence + * window after (a)–(l): shard databases by hash key and run N + * analyzer instances in parallel. Concurrency is then controlled by + * deployment count, not in-process worker pools. + */ /** * Core analysis loop. Loads {@code table_stats} rows and evaluates each table against every * registered {@link OperationAnalyzer} in a single pass. * *

Both sides of the join — current operations and latest history per (table, type) — are loaded - * into maps once per run before the table loop. Both are bounded by the number of tables, so - * holding them in memory is safe at any realistic scale. - * - *

// TODO: Iterate per-database instead of loading all tables at once. This scopes memory usage - * and allows incremental progress. - * - *

// TODO: Benchmark memory footprint at 10k tables per iteration to validate the in-memory join - * approach. + * into maps once per run before the table loop. This is correct at small scale (≤~100k tables) but + * breaks past that; see the performance roadmap block comment above for the queued optimizations + * and their triggering thresholds. */ @Slf4j @Component From d7e3a6559cbb91162707b15c4b4404f4c609e3c3 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 13 May 2026 10:25:56 -0700 Subject: [PATCH 11/19] docs(optimizer-analyzer): move scale roadmap to BDP-102182 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The (a)–(m) optimization roadmap was a ~75-line block comment on top of AnalyzerRunner. The content now lives in Jira BDP-102182 ("Optimizer analyzer: scale past 100k tables"), where it can be tracked, assigned, and broken into work without churning the source. Replace the block comment with a short class-javadoc pointer. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../openhouse/analyzer/AnalyzerRunner.java | 83 ++----------------- 1 file changed, 5 insertions(+), 78 deletions(-) diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java index e4084884f..6f7c68d6a 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java @@ -18,89 +18,16 @@ import lombok.extern.slf4j.Slf4j; import org.springframework.stereotype.Component; -/* - * Performance roadmap — read before scaling this runner past ~100k tables. - * - * Current shape: each pass loads all table_stats, all active table_operations - * per op type, and the latest history row per (table_uuid, op_type) per op - * type, then iterates (table × analyzer) in memory. Heap is O(tables) and - * CPU is O(tables × op_types) per pass. This works at ~100k tables / daily - * cadence with ~8 GB heap. It does NOT work at 1M tables / 15-min cadence: - * heap peaks past 10 GB during ops loading, the history correlated subquery - * does not complete without index (d) below, and per-pass wall time exceeds - * the cadence window. - * - * The optimizations below are roughly ordered by when they matter. Items - * (a)–(d) are hard prerequisites for scaling past ~100k tables; the rest - * unlock further headroom. Items already landed are noted inline. - * - * Schema / data-model prerequisites: - * (a) Denormalized per-operation enablement on table_stats (or a sibling - * table), kept current by commit hooks on table-property changes. Each - * op type has its own enable flag — this mirrors the existing - * enable/disable pattern for non-optimizer maintenance operations on - * table properties (e.g. maintenance.optimizer.ofd.enabled). Lets the - * analyzer filter opted-in (table, op_type) pairs at index level - * instead of parsing the tableProperties JSON for every row. 10–100× - * data reduction in early rollout where opt-in is selective per op. - * (b) Index on table_operations(table_uuid, operation_type, created_at) — - * drives the cooldown anti-join in (e)/(f). Lands with that query. - * (c) Index supporting per-op opt-in lookup on the denormalized structure - * from (a). Shape depends on (a)'s representation. Lands with (e)/(f). - * (d) Index on table_operations_history(operation_type, table_uuid, - * completed_at) — makes findLatestPerTable O(N log N) instead of - * O(N²). Without this the query does not complete at 1M-row scale. - * LANDED: idx_toph_optype_uuid_completed. - * - * Query shape: - * (e) findCandidatesByDatabase(db, opType, cooldown) repo method that - * pushes both the per-op opt-in filter (from (a)) and the cooldown - * predicate to SQL via NOT EXISTS. In-cooldown and opted-out tables - * produce zero rows; their op rows are never materialized in the - * application. Single biggest win on data transfer and heap. - * (f) Combined-op variant of (e): one query per db returns candidates - * across all op types in one shot. Cuts read QPS ~10× at the same - * data volume (20k queries/pass vs 200k). - * (g) Per-database iteration replacing the current global scan. Bounds - * working set to one db (1–10k tables) per pass instead of the full - * table count. - * (h) Replace the findLatestPerTable consumer with the (e)/(f) anti-join, - * encoding cadence policy (success-retry-interval, failure-retry- - * interval, scheduled-timeout) in SQL WHERE clauses. If this subsumes - * all cadence reads, historyRepo on this class becomes dead and can - * be removed; if some strategies still need Java-side cadence on - * history rows, keep it. - * - * Projection / IO: - * (i) Drop tableProperties from the stats query projection once (a) lands. - * The TEXT column carries multi-KB JSON per row; at 1M rows that's - * gigabytes of wire transfer per pass for data nobody reads. - * (j) High-volume repo reads must page or stream — Stream or - * paginated cursor, not List. JPA materializes the full list - * before stream-collect, which is the dominant heap-spike source. - * Treat this as a baseline requirement, not an optional optimization. - * (k) Batch PENDING INSERTs via saveAll(500–1000) and set - * hibernate.jdbc.batch_size. Per-row save() dominates the write phase - * at any meaningful candidate volume. - * - * Runtime / deployment: - * (l) Rate-limit queries per analyzer instance to bound MySQL load. - * The runner should pace its per-db iteration across the cadence - * window rather than racing — converts a 2k-QPS spike at the start - * of each pass into a flat ~20 QPS sustained. - * (m) Conditional, only if single-instance runtime exceeds the cadence - * window after (a)–(l): shard databases by hash key and run N - * analyzer instances in parallel. Concurrency is then controlled by - * deployment count, not in-process worker pools. - */ /** * Core analysis loop. Loads {@code table_stats} rows and evaluates each table against every * registered {@link OperationAnalyzer} in a single pass. * *

Both sides of the join — current operations and latest history per (table, type) — are loaded - * into maps once per run before the table loop. This is correct at small scale (≤~100k tables) but - * breaks past that; see the performance roadmap block comment above for the queued optimizations - * and their triggering thresholds. + * into maps once per run before the table loop. This is correct at small scale (≤~100k tables); + * past that the runner OOMs and exceeds the cadence window. Scale-up work (per-op enablement + * column, cooldown anti-join push-down, per-db iteration, streaming reads, batched writes, rate + * limiting) is tracked in BDP-102182. */ @Slf4j @Component From dd4faf2f769548e56541dbbc9abeff84e9a21af9 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 13 May 2026 11:54:17 -0700 Subject: [PATCH 12/19] =?UTF-8?q?refactor(optimizer-analyzer):=20address?= =?UTF-8?q?=20PR=20review=20=E2=80=94=20required=20op,=20per-db,=20Optiona?= =?UTF-8?q?l,=20no=20switch?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses unresolved review threads on #533: - HistoryStatus enum in analyzer-internal model. CadencePolicy parses it at the boundary; switch on String values is gone. - shouldSchedule restructured: an active non-CANCELED op short-circuits to false (the scheduler owns it). CANCELED and no-op cases defer to cadence on the latest history entry via a ternary; no switch on history status. - scheduledTimeout removed from CadencePolicy and the OFD analyzer config. Stuck-SCHEDULED recovery is a scheduler-side concern; the analyzer no longer inspects scheduledAt. - AnalyzerRunner.analyze now requires an OperationType per call. The per- analyzer inner loop is gone; the function processes one operation type at a time. AnalyzerApplication's CommandLineRunner iterates registered analyzers and calls analyze(op) per type. - AnalyzerRunner iterates databases — uses statsRepo.findDistinctDatabaseNames when no databaseName filter is supplied. Per-db query block factored into analyzeDatabase; working set is bounded by tables-per-db. - Optional filter params on analyze (databaseName/tableName/ tableUuid) and Optional unwrapping at the JPA call boundary. - TableOperation switched to Lombok @Builder; manual private build() factory removed; from() and pending() use the generated builder. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../analyzer/AnalyzerApplication.java | 10 +- .../openhouse/analyzer/AnalyzerRunner.java | 167 +++++++++--------- ...denceBasedOrphanFilesDeletionAnalyzer.java | 8 +- .../openhouse/analyzer/CadencePolicy.java | 70 ++------ .../analyzer/model/HistoryStatus.java | 13 ++ .../analyzer/model/TableOperation.java | 62 +++---- .../src/main/resources/application.properties | 1 - .../analyzer/AnalyzerRunnerTest.java | 58 +++--- ...eBasedOrphanFilesDeletionAnalyzerTest.java | 85 +++------ 9 files changed, 195 insertions(+), 279 deletions(-) create mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/HistoryStatus.java diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerApplication.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerApplication.java index 99ba56047..edee9c02e 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerApplication.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerApplication.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.analyzer; +import java.util.List; import org.springframework.boot.CommandLineRunner; import org.springframework.boot.SpringApplication; import org.springframework.boot.autoconfigure.SpringBootApplication; @@ -17,9 +18,12 @@ public static void main(String[] args) { SpringApplication.run(AnalyzerApplication.class, args); } - /** Delegates to {@link AnalyzerRunner#analyze()} once per process invocation. */ + /** + * Runs the analyzer once per registered {@link OperationAnalyzer} per process invocation. Each + * call is scoped to one operation type; the runner iterates databases internally. + */ @Bean - public CommandLineRunner run(AnalyzerRunner runner) { - return args -> runner.analyze(); + public CommandLineRunner run(AnalyzerRunner runner, List analyzers) { + return args -> analyzers.forEach(a -> runner.analyze(a.getOperationType())); } } diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java index 6f7c68d6a..dfc605f50 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java @@ -19,15 +19,13 @@ import org.springframework.stereotype.Component; /** - * Core analysis loop. Loads {@code table_stats} rows and evaluates each table against every - * registered {@link OperationAnalyzer} in a single pass. + * Core analysis loop. For one operation type per call, iterates databases and evaluates each table + * in a database against the matching {@link OperationAnalyzer}. * *

Both sides of the join — current operations and latest history per (table, type) — are loaded - * into maps once per run before the table loop. This is correct at small scale (≤~100k tables); - * past that the runner OOMs and exceeds the cadence window. Scale-up work (per-op enablement - * column, cooldown anti-join push-down, per-db iteration, streaming reads, batched writes, rate - * limiting) is tracked in BDP-102182. + * into maps once per database before the table loop. This is correct at small scale (≤~100k + * tables); past that the per-db query shape and projection need further tuning. Scale-up work is + * tracked in BDP-102182. */ @Slf4j @Component @@ -39,100 +37,99 @@ public class AnalyzerRunner { private final TableOperationsRepository operationsRepo; private final TableOperationHistoryRepository historyRepo; - /** Run the full analysis loop once with no filters. */ - public void analyze() { - analyze(null, null, null, null); + /** + * Run the analysis loop for {@code operationType} across all databases, with no filters. + * Equivalent to {@link #analyze(OperationType, Optional, Optional, Optional)} with all-empty + * filters. + */ + public void analyze(OperationType operationType) { + analyze(operationType, Optional.empty(), Optional.empty(), Optional.empty()); } /** - * Run the analysis loop, optionally scoped to a specific operation type, database, table name, or - * table UUID. Pass {@code null} for any parameter to skip that filter. + * Run the analysis loop for the given operation type, optionally scoped to a single database, + * table name, or table UUID. Iterates databases one at a time so the working set is bounded by + * tables-per-db, not tables-total. */ public void analyze( - OperationType operationType, String databaseName, String tableName, String tableUuid) { - - List activeAnalyzers = - operationType == null - ? analyzers - : analyzers.stream() - .filter(a -> a.getOperationType() == operationType) - .collect(Collectors.toList()); - - // Pre-load the small sides of the joins — one query per analyzer type. - // TODO: Move to a query builder (Criteria API or jOOQ) as filter count grows. - Map> opsByType = - activeAnalyzers.stream() + OperationType operationType, + Optional databaseName, + Optional tableName, + Optional tableUuid) { + + Optional analyzerOpt = + analyzers.stream().filter(a -> a.getOperationType() == operationType).findFirst(); + if (analyzerOpt.isEmpty()) { + log.warn("No analyzer registered for operation type {}; skipping", operationType); + return; + } + OperationAnalyzer analyzer = analyzerOpt.get(); + + List dbs = databaseName.map(List::of).orElseGet(statsRepo::findDistinctDatabaseNames); + log.info("Analyzing {} across {} database(s)", operationType, dbs.size()); + + dbs.forEach(db -> analyzeDatabase(analyzer, db, tableName, tableUuid)); + + log.info("Analysis complete for {}", operationType); + } + + private void analyzeDatabase( + OperationAnalyzer analyzer, + String databaseName, + Optional tableName, + Optional tableUuid) { + + String operationType = analyzer.getOperationType().name(); + + // Pre-load the small sides of the joins — bounded by tables in this database. + Map currentOps = + operationsRepo + .find(operationType, null, tableUuid.orElse(null), databaseName, tableName.orElse(null)) + .stream() + .filter(e -> e.getTableUuid() != null) .collect( Collectors.toMap( - OperationAnalyzer::getOperationType, - a -> - operationsRepo - .find( - a.getOperationType().name(), - null, - tableUuid, - databaseName, - tableName) - .stream() - .filter(e -> e.getTableUuid() != null) - .collect( - Collectors.toMap( - TableOperationRow::getTableUuid, - TableOperation::from, - TableOperation::mostRecent)))); - - // Latest history row per (table_uuid, operation_type), one query per analyzer. The repo query - // may return tied rows for the same key on identical completed_at; dedupe in memory. - Map> latestHistoryByType = - activeAnalyzers.stream() + TableOperationRow::getTableUuid, + TableOperation::from, + TableOperation::mostRecent)); + + // Latest history row per (table_uuid, op_type) for this analyzer. The repo query may return + // tied rows on identical completed_at; dedupe in memory. + Map latestHistory = + historyRepo.findLatestPerTable(operationType).stream() + .filter(r -> r.getTableUuid() != null) .collect( Collectors.toMap( - OperationAnalyzer::getOperationType, - a -> - historyRepo.findLatestPerTable(a.getOperationType().name()).stream() - .filter(r -> r.getTableUuid() != null) - .collect( - Collectors.toMap( - TableOperationHistoryRow::getTableUuid, - r -> r, - AnalyzerRunner::moreRecentHistory)))); + TableOperationHistoryRow::getTableUuid, + r -> r, + AnalyzerRunner::moreRecentHistory)); List

tables = - statsRepo.find(databaseName, tableName, tableUuid).stream() + statsRepo.find(databaseName, tableName.orElse(null), tableUuid.orElse(null)).stream() .filter(row -> row.getTableUuid() != null) .map(Table::from) .collect(Collectors.toList()); - log.info("Found {} tables in optimizer table_stats", tables.size()); tables.forEach( - table -> - activeAnalyzers.forEach( - analyzer -> { - if (!analyzer.isEnabled(table)) { - return; - } - - Optional currentOp = - Optional.ofNullable( - opsByType.get(analyzer.getOperationType()).get(table.getTableUuid())); - Optional latestHistory = - Optional.ofNullable( - latestHistoryByType - .get(analyzer.getOperationType()) - .get(table.getTableUuid())); - - if (analyzer.shouldSchedule(table, currentOp, latestHistory)) { - TableOperation op = TableOperation.pending(table, analyzer.getOperationType()); - operationsRepo.save(op.toRow()); - log.info( - "Created PENDING {} operation for table {}.{}", - analyzer.getOperationType(), - table.getDatabaseName(), - table.getTableId()); - } - })); - - log.info("Analysis complete"); + table -> { + if (!analyzer.isEnabled(table)) { + return; + } + Optional currentOp = + Optional.ofNullable(currentOps.get(table.getTableUuid())); + Optional entry = + Optional.ofNullable(latestHistory.get(table.getTableUuid())); + + if (analyzer.shouldSchedule(table, currentOp, entry)) { + TableOperation op = TableOperation.pending(table, analyzer.getOperationType()); + operationsRepo.save(op.toRow()); + log.info( + "Created PENDING {} operation for table {}.{}", + analyzer.getOperationType(), + table.getDatabaseName(), + table.getTableId()); + } + }); } private static TableOperationHistoryRow moreRecentHistory( diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java index c50025b6a..e66bc070d 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java @@ -21,13 +21,9 @@ public class CadenceBasedOrphanFilesDeletionAnalyzer implements OperationAnalyze @Autowired public CadenceBasedOrphanFilesDeletionAnalyzer( @Value("${ofd.success-retry-hours:24}") long successRetryHours, - @Value("${ofd.failure-retry-hours:1}") long failureRetryHours, - @Value("${ofd.scheduled-timeout-hours:6}") long scheduledTimeoutHours) { + @Value("${ofd.failure-retry-hours:1}") long failureRetryHours) { this.cadencePolicy = - new CadencePolicy( - Duration.ofHours(successRetryHours), - Duration.ofHours(failureRetryHours), - Duration.ofHours(scheduledTimeoutHours)); + new CadencePolicy(Duration.ofHours(successRetryHours), Duration.ofHours(failureRetryHours)); } /** Package-private for tests that supply a pre-built {@link CadencePolicy}. */ diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java index 4cf892021..7aa646cf6 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java @@ -1,5 +1,7 @@ package com.linkedin.openhouse.analyzer; +import com.linkedin.openhouse.analyzer.model.HistoryStatus; +import com.linkedin.openhouse.analyzer.model.OperationStatus; import com.linkedin.openhouse.analyzer.model.TableOperation; import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; import java.time.Duration; @@ -8,14 +10,13 @@ import lombok.RequiredArgsConstructor; /** - * Encapsulates the time-based scheduling logic shared across operation types. An analyzer delegates - * to {@link CadencePolicy} to decide whether to re-issue a recommendation for a table that already - * has an active operation record and/or history. + * Time-based scheduling policy. An analyzer delegates to {@link CadencePolicy} to decide whether to + * re-issue a recommendation for a table. * - *

The SCHEDULED timeout is a key safety mechanism: if a Spark job crashes without reporting - * back, the SCHEDULED row would otherwise block the table forever. When the row has been SCHEDULED - * (or SCHEDULING) longer than {@code scheduledTimeout}, the Analyzer treats it as stale and returns - * {@code true}, causing a new PENDING row to be inserted. + *

The analyzer stays out of any table that already has a non-CANCELED active operation — those + * belong to the scheduler. For tables with no active operation (or only a CANCELED one), the + * decision is based on the most recent completed-history entry: re-evaluate after {@code + * successRetryInterval} on success, or after {@code failureRetryInterval} on failure. */ @RequiredArgsConstructor public class CadencePolicy { @@ -34,14 +35,6 @@ public class CadencePolicy { */ private final Duration failureRetryInterval; - /** - * Maximum time a row can stay in SCHEDULED status before the analyzer treats it as stale and - * overwrites it with a new PENDING row. Handles the case where a Spark job crashes without - * reporting back. For example, if set to 6 hours and a job was submitted at noon but never - * completed, the analyzer will re-schedule the table after 6:00 PM. - */ - private final Duration scheduledTimeout; - /** * Returns {@code true} if a new or refreshed operation record should be upserted. * @@ -50,47 +43,16 @@ public class CadencePolicy { */ public boolean shouldSchedule( Optional currentOp, Optional latestHistory) { - if (currentOp.isEmpty()) { - return decideFromHistory(latestHistory); - } - TableOperation op = currentOp.get(); - switch (op.getStatus()) { - case PENDING: - case SCHEDULING: - return false; - case SCHEDULED: - // Two scenarios for a SCHEDULED row: - // - no history yet: the job is still running (or crashed); fall through to the - // scheduledTimeout safety net. - // - history present: the job completed and history was written; defer to cadence policy - // on the history entry. - return latestHistory.isPresent() - ? decideFromHistory(latestHistory) - : pastInterval(op.getScheduledAt(), scheduledTimeout); - case CANCELED: - return decideFromHistory(latestHistory); - default: - throw new IllegalStateException("Unhandled operation status: " + op.getStatus()); - } - } - - private boolean decideFromHistory(Optional latestHistory) { - if (latestHistory.isEmpty()) { - return true; - } - TableOperationHistoryRow entry = latestHistory.get(); - switch (entry.getStatus()) { - case "SUCCESS": - return pastInterval(entry.getCompletedAt(), successRetryInterval); - case "FAILED": - return pastInterval(entry.getCompletedAt(), failureRetryInterval); - default: - return true; + if (currentOp.isPresent() && currentOp.get().getStatus() != OperationStatus.CANCELED) { + return false; } + return latestHistory.map(this::readyAfterHistoryEntry).orElse(true); } - /** {@code true} if {@code timestamp} is null or {@code interval} has elapsed since then. */ - private boolean pastInterval(Instant timestamp, Duration interval) { - return timestamp == null || Duration.between(timestamp, Instant.now()).compareTo(interval) > 0; + private boolean readyAfterHistoryEntry(TableOperationHistoryRow entry) { + HistoryStatus status = HistoryStatus.valueOf(entry.getStatus()); + Duration interval = + status == HistoryStatus.FAILED ? failureRetryInterval : successRetryInterval; + return Duration.between(entry.getCompletedAt(), Instant.now()).compareTo(interval) > 0; } } diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/HistoryStatus.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/HistoryStatus.java new file mode 100644 index 000000000..eb0e46762 --- /dev/null +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/HistoryStatus.java @@ -0,0 +1,13 @@ +package com.linkedin.openhouse.analyzer.model; + +/** + * Analyzer-internal lifecycle outcomes for a completed operation. Mirrors the values written to + * {@code table_operations_history.status}; parsed at the boundary so the analyzer can switch on a + * typed value instead of comparing strings. + * + *

Intentionally separate from the wire-API and DB representations. + */ +public enum HistoryStatus { + SUCCESS, + FAILED +} diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java index 54e569b6a..3de08b9c5 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java @@ -4,6 +4,8 @@ import java.time.Instant; import java.util.Comparator; import java.util.UUID; +import lombok.AllArgsConstructor; +import lombok.Builder; import lombok.Data; import lombok.NoArgsConstructor; @@ -13,7 +15,9 @@ * new PENDING operation). Converts back to a JPA row via {@link #toRow()}. */ @Data +@Builder @NoArgsConstructor +@AllArgsConstructor public class TableOperation { /** Unique operation ID (UUID). */ @@ -42,28 +46,29 @@ public class TableOperation { /** Build a {@code TableOperation} from an existing JPA row. */ public static TableOperation from(TableOperationRow row) { - return build( - row.getId(), - row.getTableUuid(), - row.getDatabaseName(), - row.getTableName(), - OperationType.valueOf(row.getOperationType()), - OperationStatus.valueOf(row.getStatus()), - row.getCreatedAt(), - row.getScheduledAt()); + return TableOperation.builder() + .id(row.getId()) + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableName(row.getTableName()) + .operationType(OperationType.valueOf(row.getOperationType())) + .status(OperationStatus.valueOf(row.getStatus())) + .createdAt(row.getCreatedAt()) + .scheduledAt(row.getScheduledAt()) + .build(); } /** Create a new PENDING operation for the given table and operation type. */ public static TableOperation pending(Table table, OperationType operationType) { - return build( - UUID.randomUUID().toString(), - table.getTableUuid(), - table.getDatabaseName(), - table.getTableId(), - operationType, - OperationStatus.PENDING, - Instant.now(), - null); + return TableOperation.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(table.getTableUuid()) + .databaseName(table.getDatabaseName()) + .tableName(table.getTableId()) + .operationType(operationType) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build(); } /** Convert to a JPA entity for persistence. */ @@ -87,25 +92,4 @@ public static TableOperation mostRecent(TableOperation a, TableOperation b) { Comparator.comparing(r -> r.getCreatedAt() != null ? r.getCreatedAt() : Instant.EPOCH); return byCreatedAt.compare(a, b) >= 0 ? a : b; } - - private static TableOperation build( - String id, - String tableUuid, - String databaseName, - String tableName, - OperationType operationType, - OperationStatus status, - Instant createdAt, - Instant scheduledAt) { - TableOperation op = new TableOperation(); - op.id = id; - op.tableUuid = tableUuid; - op.databaseName = databaseName; - op.tableName = tableName; - op.operationType = operationType; - op.status = status; - op.createdAt = createdAt; - op.scheduledAt = scheduledAt; - return op; - } } diff --git a/apps/optimizer-analyzer/src/main/resources/application.properties b/apps/optimizer-analyzer/src/main/resources/application.properties index 990740f1d..1df0bea15 100644 --- a/apps/optimizer-analyzer/src/main/resources/application.properties +++ b/apps/optimizer-analyzer/src/main/resources/application.properties @@ -6,4 +6,3 @@ spring.datasource.password=${OPTIMIZER_DB_PASSWORD:} spring.jpa.hibernate.ddl-auto=none ofd.success-retry-hours=24 ofd.failure-retry-hours=1 -ofd.scheduled-timeout-hours=6 diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java index 1feafba29..9734a329a 100644 --- a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java @@ -29,7 +29,9 @@ @ExtendWith(MockitoExtension.class) class AnalyzerRunnerTest { - private static final String OFD = OperationType.ORPHAN_FILES_DELETION.name(); + private static final OperationType OFD_TYPE = OperationType.ORPHAN_FILES_DELETION; + private static final String OFD = OFD_TYPE.name(); + private static final String DB = "db1"; @Mock private TableStatsRepository statsRepo; @Mock private TableOperationsRepository operationsRepo; @@ -41,33 +43,34 @@ class AnalyzerRunnerTest { @BeforeEach void setUp() { runner = new AnalyzerRunner(List.of(analyzer), statsRepo, operationsRepo, historyRepo); + when(analyzer.getOperationType()).thenReturn(OFD_TYPE); + when(statsRepo.findDistinctDatabaseNames()).thenReturn(List.of(DB)); } @Test void analyze_insertsNewRow_forEligibleTableWithNoExistingOp() { TableStatsRow statsEntity = new TableStatsRow(); statsEntity.setTableUuid("uuid-1"); - statsEntity.setDatabaseName("db1"); + statsEntity.setDatabaseName(DB); statsEntity.setTableName("tbl1"); Table expectedTable = - Table.builder().tableUuid("uuid-1").databaseName("db1").tableId("tbl1").build(); + Table.builder().tableUuid("uuid-1").databaseName(DB).tableId("tbl1").build(); - when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); - when(analyzer.getOperationType()).thenReturn(OperationType.ORPHAN_FILES_DELETION); - when(operationsRepo.find(OFD, null, null, null, null)).thenReturn(Collections.emptyList()); + when(statsRepo.find(DB, null, null)).thenReturn(List.of(statsEntity)); + when(operationsRepo.find(OFD, null, null, DB, null)).thenReturn(Collections.emptyList()); when(historyRepo.findLatestPerTable(OFD)).thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(true); when(analyzer.shouldSchedule(expectedTable, Optional.empty(), Optional.empty())) .thenReturn(true); - runner.analyze(); + runner.analyze(OFD_TYPE); ArgumentCaptor captor = ArgumentCaptor.forClass(TableOperationRow.class); verify(operationsRepo).save(captor.capture()); TableOperationRow saved = captor.getValue(); assertThat(saved.getTableUuid()).isEqualTo("uuid-1"); - assertThat(saved.getDatabaseName()).isEqualTo("db1"); + assertThat(saved.getDatabaseName()).isEqualTo(DB); assertThat(saved.getTableName()).isEqualTo("tbl1"); assertThat(saved.getOperationType()).isEqualTo(OFD); assertThat(saved.getStatus()).isEqualTo("PENDING"); @@ -78,11 +81,11 @@ void analyze_insertsNewRow_forEligibleTableWithNoExistingOp() { void analyze_noOp_whenCadencePolicyReturnsFalseForPending() { TableStatsRow statsEntity = new TableStatsRow(); statsEntity.setTableUuid("uuid-1"); - statsEntity.setDatabaseName("db1"); + statsEntity.setDatabaseName(DB); statsEntity.setTableName("tbl1"); Table expectedTable = - Table.builder().tableUuid("uuid-1").databaseName("db1").tableId("tbl1").build(); + Table.builder().tableUuid("uuid-1").databaseName(DB).tableId("tbl1").build(); TableOperationRow existingEntity = new TableOperationRow(); existingEntity.setId("existing-op-id"); @@ -91,9 +94,8 @@ void analyze_noOp_whenCadencePolicyReturnsFalseForPending() { existingEntity.setOperationType(OFD); existingEntity.setCreatedAt(Instant.now()); - when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); - when(analyzer.getOperationType()).thenReturn(OperationType.ORPHAN_FILES_DELETION); - when(operationsRepo.find(OFD, null, null, null, null)).thenReturn(List.of(existingEntity)); + when(statsRepo.find(DB, null, null)).thenReturn(List.of(statsEntity)); + when(operationsRepo.find(OFD, null, null, DB, null)).thenReturn(List.of(existingEntity)); when(historyRepo.findLatestPerTable(OFD)).thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(true); @@ -101,7 +103,7 @@ void analyze_noOp_whenCadencePolicyReturnsFalseForPending() { when(analyzer.shouldSchedule(expectedTable, Optional.of(existingOp), Optional.empty())) .thenReturn(false); - runner.analyze(); + runner.analyze(OFD_TYPE); verify(operationsRepo, never()).save(any()); } @@ -110,16 +112,16 @@ void analyze_noOp_whenCadencePolicyReturnsFalseForPending() { void analyze_skipsTable_whenNotEnabled() { TableStatsRow statsEntity = new TableStatsRow(); statsEntity.setTableUuid("uuid-1"); + statsEntity.setDatabaseName(DB); - Table expectedTable = Table.builder().tableUuid("uuid-1").build(); + Table expectedTable = Table.builder().tableUuid("uuid-1").databaseName(DB).build(); - when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); - when(analyzer.getOperationType()).thenReturn(OperationType.ORPHAN_FILES_DELETION); - when(operationsRepo.find(OFD, null, null, null, null)).thenReturn(Collections.emptyList()); + when(statsRepo.find(DB, null, null)).thenReturn(List.of(statsEntity)); + when(operationsRepo.find(OFD, null, null, DB, null)).thenReturn(Collections.emptyList()); when(historyRepo.findLatestPerTable(OFD)).thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(false); - runner.analyze(); + runner.analyze(OFD_TYPE); verify(operationsRepo, never()).save(any()); } @@ -128,8 +130,9 @@ void analyze_skipsTable_whenNotEnabled() { void analyze_skipsTable_whenShouldScheduleReturnsFalse() { TableStatsRow statsEntity = new TableStatsRow(); statsEntity.setTableUuid("uuid-1"); + statsEntity.setDatabaseName(DB); - Table expectedTable = Table.builder().tableUuid("uuid-1").build(); + Table expectedTable = Table.builder().tableUuid("uuid-1").databaseName(DB).build(); TableOperationRow scheduled = new TableOperationRow(); scheduled.setId("op-id"); @@ -138,9 +141,8 @@ void analyze_skipsTable_whenShouldScheduleReturnsFalse() { scheduled.setOperationType(OFD); scheduled.setCreatedAt(Instant.now()); - when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); - when(analyzer.getOperationType()).thenReturn(OperationType.ORPHAN_FILES_DELETION); - when(operationsRepo.find(OFD, null, null, null, null)).thenReturn(List.of(scheduled)); + when(statsRepo.find(DB, null, null)).thenReturn(List.of(statsEntity)); + when(operationsRepo.find(OFD, null, null, DB, null)).thenReturn(List.of(scheduled)); when(historyRepo.findLatestPerTable(OFD)).thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(true); @@ -148,7 +150,7 @@ void analyze_skipsTable_whenShouldScheduleReturnsFalse() { when(analyzer.shouldSchedule(expectedTable, Optional.of(scheduledOp), Optional.empty())) .thenReturn(false); - runner.analyze(); + runner.analyze(OFD_TYPE); verify(operationsRepo, never()).save(any()); } @@ -157,13 +159,13 @@ void analyze_skipsTable_whenShouldScheduleReturnsFalse() { void analyze_skipsTable_whenTableUuidIsNull() { TableStatsRow statsEntity = new TableStatsRow(); statsEntity.setTableUuid(null); + statsEntity.setDatabaseName(DB); - when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); - when(analyzer.getOperationType()).thenReturn(OperationType.ORPHAN_FILES_DELETION); - when(operationsRepo.find(OFD, null, null, null, null)).thenReturn(Collections.emptyList()); + when(statsRepo.find(DB, null, null)).thenReturn(List.of(statsEntity)); + when(operationsRepo.find(OFD, null, null, DB, null)).thenReturn(Collections.emptyList()); when(historyRepo.findLatestPerTable(anyString())).thenReturn(Collections.emptyList()); - runner.analyze(); + runner.analyze(OFD_TYPE); verify(operationsRepo, never()).save(any()); } diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java index 9a847f34c..771707258 100644 --- a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java @@ -18,7 +18,6 @@ class CadenceBasedOrphanFilesDeletionAnalyzerTest { private static final Duration SUCCESS_INTERVAL = Duration.ofHours(24); private static final Duration FAILURE_INTERVAL = Duration.ofHours(1); - private static final Duration SCHEDULED_TIMEOUT = Duration.ofHours(6); private CadenceBasedOrphanFilesDeletionAnalyzer analyzer; @@ -26,7 +25,7 @@ class CadenceBasedOrphanFilesDeletionAnalyzerTest { void setUp() { analyzer = new CadenceBasedOrphanFilesDeletionAnalyzer( - new CadencePolicy(SUCCESS_INTERVAL, FAILURE_INTERVAL, SCHEDULED_TIMEOUT)); + new CadencePolicy(SUCCESS_INTERVAL, FAILURE_INTERVAL)); } // --- isEnabled --- @@ -105,14 +104,14 @@ void shouldSchedule_noOp_failedHistoryBeforeRetry_returnsFalse() { .isFalse(); } - // --- shouldSchedule: PENDING / SCHEDULING --- + // --- shouldSchedule: active op (non-CANCELED) → analyzer stays out --- @Test void shouldSchedule_pending_returnsFalse() { assertThat( analyzer.shouldSchedule( tableWithProperty("true"), - Optional.of(opWithStatus(OperationStatus.PENDING, null)), + Optional.of(opWithStatus(OperationStatus.PENDING)), Optional.empty())) .isFalse(); } @@ -122,93 +121,56 @@ void shouldSchedule_scheduling_returnsFalse() { assertThat( analyzer.shouldSchedule( tableWithProperty("true"), - Optional.of(opWithStatus(OperationStatus.SCHEDULING, null)), + Optional.of(opWithStatus(OperationStatus.SCHEDULING)), Optional.empty())) .isFalse(); } - // --- shouldSchedule: SCHEDULED + history --- - @Test - void shouldSchedule_scheduledNoHistory_withinTimeout_returnsFalse() { - Instant recent = Instant.now().minus(SCHEDULED_TIMEOUT).plusSeconds(60); + void shouldSchedule_scheduled_returnsFalse_regardlessOfHistory() { + Instant historyAt = Instant.now().minus(SUCCESS_INTERVAL).minusSeconds(60); assertThat( analyzer.shouldSchedule( tableWithProperty("true"), - Optional.of(opWithStatus(OperationStatus.SCHEDULED, recent)), - Optional.empty())) + Optional.of(opWithStatus(OperationStatus.SCHEDULED)), + Optional.of(historyWithStatus("SUCCESS", historyAt)))) .isFalse(); } - @Test - void shouldSchedule_scheduledNoHistory_pastTimeout_returnsTrue() { - Instant longAgo = Instant.now().minus(SCHEDULED_TIMEOUT).minusSeconds(60); - assertThat( - analyzer.shouldSchedule( - tableWithProperty("true"), - Optional.of(opWithStatus(OperationStatus.SCHEDULED, longAgo)), - Optional.empty())) - .isTrue(); - } - - @Test - void shouldSchedule_scheduledWithNullScheduledAt_noHistory_returnsTrue() { - assertThat( - analyzer.shouldSchedule( - tableWithProperty("true"), - Optional.of(opWithStatus(OperationStatus.SCHEDULED, null)), - Optional.empty())) - .isTrue(); - } + // --- shouldSchedule: CANCELED → cadence on history --- @Test - void shouldSchedule_scheduledWithSuccessHistory_afterCooldown_returnsTrue() { - Instant scheduledAt = Instant.now().minusSeconds(3600); - Instant historyAt = Instant.now().minus(SUCCESS_INTERVAL).minusSeconds(60); + void shouldSchedule_canceled_successHistoryAfterCooldown_returnsTrue() { + Instant longAgo = Instant.now().minus(SUCCESS_INTERVAL).minusSeconds(60); assertThat( analyzer.shouldSchedule( tableWithProperty("true"), - Optional.of(opWithStatus(OperationStatus.SCHEDULED, scheduledAt)), - Optional.of(historyWithStatus("SUCCESS", historyAt)))) + Optional.of(opWithStatus(OperationStatus.CANCELED)), + Optional.of(historyWithStatus("SUCCESS", longAgo)))) .isTrue(); } @Test - void shouldSchedule_scheduledWithSuccessHistory_beforeCooldown_returnsFalse() { - Instant scheduledAt = Instant.now().minusSeconds(3600); - Instant historyAt = Instant.now().minus(SUCCESS_INTERVAL).plusSeconds(60); + void shouldSchedule_canceled_successHistoryBeforeCooldown_returnsFalse() { + Instant recent = Instant.now().minus(SUCCESS_INTERVAL).plusSeconds(60); assertThat( analyzer.shouldSchedule( tableWithProperty("true"), - Optional.of(opWithStatus(OperationStatus.SCHEDULED, scheduledAt)), - Optional.of(historyWithStatus("SUCCESS", historyAt)))) + Optional.of(opWithStatus(OperationStatus.CANCELED)), + Optional.of(historyWithStatus("SUCCESS", recent)))) .isFalse(); } @Test - void shouldSchedule_scheduledWithFailedHistory_afterRetry_returnsTrue() { - Instant scheduledAt = Instant.now().minusSeconds(3600); - Instant historyAt = Instant.now().minus(FAILURE_INTERVAL).minusSeconds(60); + void shouldSchedule_canceled_noHistory_returnsTrue() { assertThat( analyzer.shouldSchedule( tableWithProperty("true"), - Optional.of(opWithStatus(OperationStatus.SCHEDULED, scheduledAt)), - Optional.of(historyWithStatus("FAILED", historyAt)))) + Optional.of(opWithStatus(OperationStatus.CANCELED)), + Optional.empty())) .isTrue(); } - @Test - void shouldSchedule_scheduledWithFailedHistory_beforeRetry_returnsFalse() { - Instant scheduledAt = Instant.now().minusSeconds(3600); - Instant historyAt = Instant.now().minus(FAILURE_INTERVAL).plusSeconds(60); - assertThat( - analyzer.shouldSchedule( - tableWithProperty("true"), - Optional.of(opWithStatus(OperationStatus.SCHEDULED, scheduledAt)), - Optional.of(historyWithStatus("FAILED", historyAt)))) - .isFalse(); - } - // --- helpers --- private Table tableWithProperty(String value) { @@ -224,11 +186,8 @@ private Table tableWithProperty(String value) { .build(); } - private TableOperation opWithStatus(OperationStatus status, Instant scheduledAt) { - TableOperation op = new TableOperation(); - op.setStatus(status); - op.setScheduledAt(scheduledAt); - return op; + private TableOperation opWithStatus(OperationStatus status) { + return TableOperation.builder().status(status).build(); } private TableOperationHistoryRow historyWithStatus(String status, Instant completedAt) { From 91ba36241499e5c0791d3e8da2670cf1b2d29e41 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 13 May 2026 13:21:39 -0700 Subject: [PATCH 13/19] style(optimizer-analyzer): tighten AnalyzerRunner.analyze body MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop the blank lines inside analyze() between statements (post-signature, post-dbs, post-forEach). Also remove the explanatory comment above latestHistory — the variable name is self-describing and the tied-rows edge case it warned about doesn't affect correctness here. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../com/linkedin/openhouse/analyzer/AnalyzerRunner.java | 6 ------ 1 file changed, 6 deletions(-) diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java index dfc605f50..1ab40b757 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java @@ -56,7 +56,6 @@ public void analyze( Optional databaseName, Optional tableName, Optional tableUuid) { - Optional analyzerOpt = analyzers.stream().filter(a -> a.getOperationType() == operationType).findFirst(); if (analyzerOpt.isEmpty()) { @@ -64,12 +63,9 @@ public void analyze( return; } OperationAnalyzer analyzer = analyzerOpt.get(); - List dbs = databaseName.map(List::of).orElseGet(statsRepo::findDistinctDatabaseNames); log.info("Analyzing {} across {} database(s)", operationType, dbs.size()); - dbs.forEach(db -> analyzeDatabase(analyzer, db, tableName, tableUuid)); - log.info("Analysis complete for {}", operationType); } @@ -93,8 +89,6 @@ private void analyzeDatabase( TableOperation::from, TableOperation::mostRecent)); - // Latest history row per (table_uuid, op_type) for this analyzer. The repo query may return - // tied rows on identical completed_at; dedupe in memory. Map latestHistory = historyRepo.findLatestPerTable(operationType).stream() .filter(r -> r.getTableUuid() != null) From 0dbe3d9a5c82b5be36158e2da7cd10b0ed22122f Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 13 May 2026 17:04:07 -0700 Subject: [PATCH 14/19] refactor(optimizer-analyzer): import shared model + explanatory comment Move imports from com.linkedin.openhouse.analyzer.model to the shared com.linkedin.openhouse.optimizer.model package. The five local copies (Table, TableOperation, OperationType, OperationStatus, HistoryStatus) are removed; tests and runtime classes import from the shared location. Adds a /* ... */ block comment above the per-table loop in AnalyzerRunner.analyzeDatabase walking through what each step does: opt-in check, current-op and latest-history lookup, delegation to shouldSchedule, and PENDING persistence on true. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../openhouse/analyzer/AnalyzerRunner.java | 18 +++- ...denceBasedOrphanFilesDeletionAnalyzer.java | 6 +- .../openhouse/analyzer/CadencePolicy.java | 6 +- .../openhouse/analyzer/OperationAnalyzer.java | 6 +- .../analyzer/model/HistoryStatus.java | 13 --- .../analyzer/model/OperationStatus.java | 14 --- .../analyzer/model/OperationType.java | 10 -- .../openhouse/analyzer/model/Table.java | 42 -------- .../analyzer/model/TableOperation.java | 95 ------------------- .../analyzer/AnalyzerRunnerTest.java | 6 +- ...eBasedOrphanFilesDeletionAnalyzerTest.java | 6 +- 11 files changed, 29 insertions(+), 193 deletions(-) delete mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/HistoryStatus.java delete mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/OperationStatus.java delete mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/OperationType.java delete mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/Table.java delete mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java index 1ab40b757..ae865b11e 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java @@ -1,10 +1,10 @@ package com.linkedin.openhouse.analyzer; -import com.linkedin.openhouse.analyzer.model.OperationType; -import com.linkedin.openhouse.analyzer.model.Table; -import com.linkedin.openhouse.analyzer.model.TableOperation; import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; import com.linkedin.openhouse.optimizer.entity.TableOperationRow; +import com.linkedin.openhouse.optimizer.model.OperationType; +import com.linkedin.openhouse.optimizer.model.Table; +import com.linkedin.openhouse.optimizer.model.TableOperation; import com.linkedin.openhouse.optimizer.repository.TableOperationHistoryRepository; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; @@ -104,6 +104,17 @@ private void analyzeDatabase( .map(Table::from) .collect(Collectors.toList()); + /* + * For each table in this database, decide whether to create a new PENDING operation. + * + * 1. Skip tables not opted in to this operation type. The opt-in check today reads a + * table-property flag; in the future it will read a denormalized column. + * 2. Look up the table's current active operation (if any) and its most recent completed + * history entry from the maps loaded above. + * 3. Delegate the schedule-or-not decision to the analyzer's shouldSchedule — strategy + * encapsulates cadence, retry policy, and any future per-operation signals. + * 4. On true, persist a new PENDING operation. The scheduler picks it up on its next pass. + */ tables.forEach( table -> { if (!analyzer.isEnabled(table)) { @@ -113,7 +124,6 @@ private void analyzeDatabase( Optional.ofNullable(currentOps.get(table.getTableUuid())); Optional entry = Optional.ofNullable(latestHistory.get(table.getTableUuid())); - if (analyzer.shouldSchedule(table, currentOp, entry)) { TableOperation op = TableOperation.pending(table, analyzer.getOperationType()); operationsRepo.save(op.toRow()); diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java index e66bc070d..7f6a0b68b 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java @@ -1,9 +1,9 @@ package com.linkedin.openhouse.analyzer; -import com.linkedin.openhouse.analyzer.model.OperationType; -import com.linkedin.openhouse.analyzer.model.Table; -import com.linkedin.openhouse.analyzer.model.TableOperation; import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; +import com.linkedin.openhouse.optimizer.model.OperationType; +import com.linkedin.openhouse.optimizer.model.Table; +import com.linkedin.openhouse.optimizer.model.TableOperation; import java.time.Duration; import java.util.Optional; import org.springframework.beans.factory.annotation.Autowired; diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java index 7aa646cf6..b95dadc5b 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java @@ -1,9 +1,9 @@ package com.linkedin.openhouse.analyzer; -import com.linkedin.openhouse.analyzer.model.HistoryStatus; -import com.linkedin.openhouse.analyzer.model.OperationStatus; -import com.linkedin.openhouse.analyzer.model.TableOperation; import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; +import com.linkedin.openhouse.optimizer.model.HistoryStatus; +import com.linkedin.openhouse.optimizer.model.OperationStatus; +import com.linkedin.openhouse.optimizer.model.TableOperation; import java.time.Duration; import java.time.Instant; import java.util.Optional; diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java index 33f2b8e5d..b301f9d09 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java @@ -1,9 +1,9 @@ package com.linkedin.openhouse.analyzer; -import com.linkedin.openhouse.analyzer.model.OperationType; -import com.linkedin.openhouse.analyzer.model.Table; -import com.linkedin.openhouse.analyzer.model.TableOperation; import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; +import com.linkedin.openhouse.optimizer.model.OperationType; +import com.linkedin.openhouse.optimizer.model.Table; +import com.linkedin.openhouse.optimizer.model.TableOperation; import java.util.Optional; /** diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/HistoryStatus.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/HistoryStatus.java deleted file mode 100644 index eb0e46762..000000000 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/HistoryStatus.java +++ /dev/null @@ -1,13 +0,0 @@ -package com.linkedin.openhouse.analyzer.model; - -/** - * Analyzer-internal lifecycle outcomes for a completed operation. Mirrors the values written to - * {@code table_operations_history.status}; parsed at the boundary so the analyzer can switch on a - * typed value instead of comparing strings. - * - *

Intentionally separate from the wire-API and DB representations. - */ -public enum HistoryStatus { - SUCCESS, - FAILED -} diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/OperationStatus.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/OperationStatus.java deleted file mode 100644 index 8a2d1d541..000000000 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/OperationStatus.java +++ /dev/null @@ -1,14 +0,0 @@ -package com.linkedin.openhouse.analyzer.model; - -/** - * Analyzer-internal lifecycle states. The analyzer only writes {@link #PENDING}; the other values - * are read off existing rows when deciding whether to re-issue a recommendation. - * - *

Intentionally separate from the wire-API and DB representations. - */ -public enum OperationStatus { - PENDING, - SCHEDULING, - SCHEDULED, - CANCELED -} diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/OperationType.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/OperationType.java deleted file mode 100644 index da48bb459..000000000 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/OperationType.java +++ /dev/null @@ -1,10 +0,0 @@ -package com.linkedin.openhouse.analyzer.model; - -/** - * Analyzer-internal enum for the operation types this app knows how to schedule. Intentionally - * separate from the wire-API and DB representations so the analyzer can evolve its set of supported - * operations without churning either boundary. - */ -public enum OperationType { - ORPHAN_FILES_DELETION -} diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/Table.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/Table.java deleted file mode 100644 index 45e02fd60..000000000 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/Table.java +++ /dev/null @@ -1,42 +0,0 @@ -package com.linkedin.openhouse.analyzer.model; - -import com.linkedin.openhouse.optimizer.entity.TableStatsRow; -import com.linkedin.openhouse.optimizer.model.TableStats; -import java.util.Collections; -import java.util.Map; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - -/** - * An OpenHouse table enriched with stats and properties, built by combining data sources. This is - * the input to the analysis pipeline: analyzers evaluate a {@code Table} and decide whether to - * produce a {@link TableOperation}. - */ -@Data -@Builder -@NoArgsConstructor -@AllArgsConstructor -public class Table { - - private String tableUuid; - private String databaseName; - private String tableId; - - @Builder.Default private Map tableProperties = Collections.emptyMap(); - - private TableStats stats; - - /** Build a {@code Table} from a {@code table_stats} row. */ - public static Table from(TableStatsRow row) { - return Table.builder() - .tableUuid(row.getTableUuid()) - .databaseName(row.getDatabaseName()) - .tableId(row.getTableName()) - .tableProperties( - row.getTableProperties() != null ? row.getTableProperties() : Collections.emptyMap()) - .stats(row.getStats()) - .build(); - } -} diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java deleted file mode 100644 index 3de08b9c5..000000000 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java +++ /dev/null @@ -1,95 +0,0 @@ -package com.linkedin.openhouse.analyzer.model; - -import com.linkedin.openhouse.optimizer.entity.TableOperationRow; -import java.time.Instant; -import java.util.Comparator; -import java.util.UUID; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - -/** - * An operation the analyzer has decided to schedule for a table. Built either from an existing - * {@link TableOperationRow} (when loading current state) or from a {@link Table} (when creating a - * new PENDING operation). Converts back to a JPA row via {@link #toRow()}. - */ -@Data -@Builder -@NoArgsConstructor -@AllArgsConstructor -public class TableOperation { - - /** Unique operation ID (UUID). */ - private String id; - - /** The table this operation targets. */ - private String tableUuid; - - /** Database name. */ - private String databaseName; - - /** Table name. */ - private String tableName; - - /** Operation type. */ - private OperationType operationType; - - /** Current lifecycle status. */ - private OperationStatus status; - - /** When this operation record was created. */ - private Instant createdAt; - - /** When the scheduler last submitted a job for this operation. */ - private Instant scheduledAt; - - /** Build a {@code TableOperation} from an existing JPA row. */ - public static TableOperation from(TableOperationRow row) { - return TableOperation.builder() - .id(row.getId()) - .tableUuid(row.getTableUuid()) - .databaseName(row.getDatabaseName()) - .tableName(row.getTableName()) - .operationType(OperationType.valueOf(row.getOperationType())) - .status(OperationStatus.valueOf(row.getStatus())) - .createdAt(row.getCreatedAt()) - .scheduledAt(row.getScheduledAt()) - .build(); - } - - /** Create a new PENDING operation for the given table and operation type. */ - public static TableOperation pending(Table table, OperationType operationType) { - return TableOperation.builder() - .id(UUID.randomUUID().toString()) - .tableUuid(table.getTableUuid()) - .databaseName(table.getDatabaseName()) - .tableName(table.getTableId()) - .operationType(operationType) - .status(OperationStatus.PENDING) - .createdAt(Instant.now()) - .build(); - } - - /** Convert to a JPA entity for persistence. */ - public TableOperationRow toRow() { - return TableOperationRow.builder() - .id(id) - .tableUuid(tableUuid) - .databaseName(databaseName) - .tableName(tableName) - .operationType(operationType.name()) - .status(status.name()) - .createdAt(createdAt) - .scheduledAt(scheduledAt) - .version(0L) - .build(); - } - - /** Return the more recently created of two operations. */ - public static TableOperation mostRecent(TableOperation a, TableOperation b) { - Comparator byCreatedAt = - Comparator.comparing(r -> r.getCreatedAt() != null ? r.getCreatedAt() : Instant.EPOCH); - return byCreatedAt.compare(a, b) >= 0 ? a : b; - } -} diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java index 9734a329a..0d287fccf 100644 --- a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java @@ -7,11 +7,11 @@ import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; -import com.linkedin.openhouse.analyzer.model.OperationType; -import com.linkedin.openhouse.analyzer.model.Table; -import com.linkedin.openhouse.analyzer.model.TableOperation; import com.linkedin.openhouse.optimizer.entity.TableOperationRow; import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import com.linkedin.openhouse.optimizer.model.OperationType; +import com.linkedin.openhouse.optimizer.model.Table; +import com.linkedin.openhouse.optimizer.model.TableOperation; import com.linkedin.openhouse.optimizer.repository.TableOperationHistoryRepository; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java index 771707258..af7100357 100644 --- a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java @@ -2,10 +2,10 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.analyzer.model.OperationStatus; -import com.linkedin.openhouse.analyzer.model.Table; -import com.linkedin.openhouse.analyzer.model.TableOperation; import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; +import com.linkedin.openhouse.optimizer.model.OperationStatus; +import com.linkedin.openhouse.optimizer.model.Table; +import com.linkedin.openhouse.optimizer.model.TableOperation; import java.time.Duration; import java.time.Instant; import java.util.Collections; From b0898e3553eab6d54403793e9cda27aefa4309c9 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 10:10:52 -0700 Subject: [PATCH 15/19] refactor(optimizer-analyzer): depend on :services:optimizer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `:apps:optimizer-data` module was removed in the prior optimizer-2 forward commit. The analyzer's JPA + model classes now live in `:services:optimizer`; package FQNs are unchanged, so no Java imports need updating — only the gradle dep. --- apps/optimizer-analyzer/build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/optimizer-analyzer/build.gradle b/apps/optimizer-analyzer/build.gradle index 84cc38857..f66ecc608 100644 --- a/apps/optimizer-analyzer/build.gradle +++ b/apps/optimizer-analyzer/build.gradle @@ -4,7 +4,7 @@ plugins { } dependencies { - implementation project(':apps:optimizer-data') + implementation project(':services:optimizer') implementation 'org.springframework.boot:spring-boot-starter:2.7.8' implementation 'org.springframework.boot:spring-boot-starter-webflux:2.7.8' implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8' From d7767e89330b7af78009cfe25bb44d1de7adc14a Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 12:22:28 -0700 Subject: [PATCH 16/19] fix(optimizer-analyzer): rewrite AnalyzerRunnerTest to use entity builders TableStatsRow and TableOperationsRow had their constructors locked down (protected) and @Setter removed when the entity shape was realigned with optimizer-0. The analyzer tests still used `new Entity()` + setters, which no longer compiles. Switch to the builder pattern; behaviour-equivalent. --- .../analyzer/AnalyzerRunnerTest.java | 54 +++++++++---------- 1 file changed, 25 insertions(+), 29 deletions(-) diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java index a62be5622..6ff8739fa 100644 --- a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java @@ -49,10 +49,8 @@ void setUp() { @Test void analyze_insertsNewRow_forEligibleTableWithNoExistingOp() { - TableStatsRow statsEntity = new TableStatsRow(); - statsEntity.setTableUuid("uuid-1"); - statsEntity.setDatabaseName(DB); - statsEntity.setTableName("tbl1"); + TableStatsRow statsEntity = + TableStatsRow.builder().tableUuid("uuid-1").databaseName(DB).tableName("tbl1").build(); Table expectedTable = Table.builder().tableUuid("uuid-1").databaseName(DB).tableId("tbl1").build(); @@ -79,20 +77,20 @@ void analyze_insertsNewRow_forEligibleTableWithNoExistingOp() { @Test void analyze_noOp_whenCadencePolicyReturnsFalseForPending() { - TableStatsRow statsEntity = new TableStatsRow(); - statsEntity.setTableUuid("uuid-1"); - statsEntity.setDatabaseName(DB); - statsEntity.setTableName("tbl1"); + TableStatsRow statsEntity = + TableStatsRow.builder().tableUuid("uuid-1").databaseName(DB).tableName("tbl1").build(); Table expectedTable = Table.builder().tableUuid("uuid-1").databaseName(DB).tableId("tbl1").build(); - TableOperationsRow existingEntity = new TableOperationsRow(); - existingEntity.setId("existing-op-id"); - existingEntity.setStatus("PENDING"); - existingEntity.setTableUuid("uuid-1"); - existingEntity.setOperationType(OFD); - existingEntity.setCreatedAt(Instant.now()); + TableOperationsRow existingEntity = + TableOperationsRow.builder() + .id("existing-op-id") + .status("PENDING") + .tableUuid("uuid-1") + .operationType(OFD) + .createdAt(Instant.now()) + .build(); when(statsRepo.find(DB, null, null)).thenReturn(List.of(statsEntity)); when(operationsRepo.find(OFD, null, null, DB, null)).thenReturn(List.of(existingEntity)); @@ -110,9 +108,8 @@ void analyze_noOp_whenCadencePolicyReturnsFalseForPending() { @Test void analyze_skipsTable_whenNotEnabled() { - TableStatsRow statsEntity = new TableStatsRow(); - statsEntity.setTableUuid("uuid-1"); - statsEntity.setDatabaseName(DB); + TableStatsRow statsEntity = + TableStatsRow.builder().tableUuid("uuid-1").databaseName(DB).build(); Table expectedTable = Table.builder().tableUuid("uuid-1").databaseName(DB).build(); @@ -128,18 +125,19 @@ void analyze_skipsTable_whenNotEnabled() { @Test void analyze_skipsTable_whenShouldScheduleReturnsFalse() { - TableStatsRow statsEntity = new TableStatsRow(); - statsEntity.setTableUuid("uuid-1"); - statsEntity.setDatabaseName(DB); + TableStatsRow statsEntity = + TableStatsRow.builder().tableUuid("uuid-1").databaseName(DB).build(); Table expectedTable = Table.builder().tableUuid("uuid-1").databaseName(DB).build(); - TableOperationsRow scheduled = new TableOperationsRow(); - scheduled.setId("op-id"); - scheduled.setStatus("SCHEDULED"); - scheduled.setTableUuid("uuid-1"); - scheduled.setOperationType(OFD); - scheduled.setCreatedAt(Instant.now()); + TableOperationsRow scheduled = + TableOperationsRow.builder() + .id("op-id") + .status("SCHEDULED") + .tableUuid("uuid-1") + .operationType(OFD) + .createdAt(Instant.now()) + .build(); when(statsRepo.find(DB, null, null)).thenReturn(List.of(statsEntity)); when(operationsRepo.find(OFD, null, null, DB, null)).thenReturn(List.of(scheduled)); @@ -157,9 +155,7 @@ void analyze_skipsTable_whenShouldScheduleReturnsFalse() { @Test void analyze_skipsTable_whenTableUuidIsNull() { - TableStatsRow statsEntity = new TableStatsRow(); - statsEntity.setTableUuid(null); - statsEntity.setDatabaseName(DB); + TableStatsRow statsEntity = TableStatsRow.builder().databaseName(DB).build(); when(statsRepo.find(DB, null, null)).thenReturn(List.of(statsEntity)); when(operationsRepo.find(OFD, null, null, DB, null)).thenReturn(Collections.emptyList()); From ad11533166f07999d64d89ec61b96336b64d8fdd Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 15:07:18 -0700 Subject: [PATCH 17/19] refactor(optimizer-analyzer): consume model/ types and ModelDbMapper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adapt the analyzer to the entity→db rename and the removal of factory methods on model/ data types. The analyzer's strategy interface and policy now work in pure model/ types (TableOperationsHistory instead of TableOperationsHistoryRow); AnalyzerRunner uses ModelDbMapper to bridge db rows to those types. - OperationAnalyzer.shouldSchedule: parameter type changes from db.TableOperationsHistoryRow to model.TableOperationsHistory. - CadencePolicy: same parameter change; remove String→HistoryStatus parsing — the model carries it as a typed enum. - CadenceBasedOrphanFilesDeletionAnalyzer: signature update. - AnalyzerRunner: inject ModelDbMapper. Imports switch from entity/ to db/. Repository find / findLatestPerTable now take db.OperationType (translated via dbMapper.toDbOperationType). Row → model translation uses dbMapper.toTable / .toOperation / .toHistory. Persisting a new PENDING operation goes through dbMapper.toRow(op) instead of the removed TableOperation.toRow(). - Tests rewritten with the new types and pass a real ModelDbMapper into the runner-under-test. --- .../openhouse/analyzer/AnalyzerRunner.java | 37 ++++++------ ...denceBasedOrphanFilesDeletionAnalyzer.java | 4 +- .../openhouse/analyzer/CadencePolicy.java | 9 ++- .../openhouse/analyzer/OperationAnalyzer.java | 4 +- .../analyzer/AnalyzerRunnerTest.java | 60 ++++++++++--------- ...eBasedOrphanFilesDeletionAnalyzerTest.java | 23 +++---- 6 files changed, 71 insertions(+), 66 deletions(-) diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java index 7b729ab9c..0be4a5a34 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java @@ -1,10 +1,10 @@ package com.linkedin.openhouse.analyzer; -import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; -import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; import com.linkedin.openhouse.optimizer.model.OperationType; import com.linkedin.openhouse.optimizer.model.Table; import com.linkedin.openhouse.optimizer.model.TableOperation; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; +import com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper; import com.linkedin.openhouse.optimizer.repository.TableOperationsHistoryRepository; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; @@ -36,6 +36,7 @@ public class AnalyzerRunner { private final TableStatsRepository statsRepo; private final TableOperationsRepository operationsRepo; private final TableOperationsHistoryRepository historyRepo; + private final ModelDbMapper dbMapper; /** * Run the analysis loop for {@code operationType} across all databases, with no filters. @@ -75,33 +76,35 @@ private void analyzeDatabase( Optional tableName, Optional tableUuid) { - String operationType = analyzer.getOperationType().name(); + com.linkedin.openhouse.optimizer.db.OperationType dbOperationType = + dbMapper.toDbOperationType(analyzer.getOperationType()); // Pre-load the small sides of the joins — bounded by tables in this database. Map currentOps = operationsRepo - .find(operationType, null, tableUuid.orElse(null), databaseName, tableName.orElse(null)) + .find( + dbOperationType, null, tableUuid.orElse(null), databaseName, tableName.orElse(null)) .stream() .filter(e -> e.getTableUuid() != null) + .map(dbMapper::toOperation) .collect( Collectors.toMap( - TableOperationsRow::getTableUuid, - TableOperation::from, - TableOperation::mostRecent)); + TableOperation::getTableUuid, op -> op, TableOperation::mostRecent)); - Map latestHistory = - historyRepo.findLatestPerTable(operationType).stream() + Map latestHistory = + historyRepo.findLatestPerTable(dbOperationType).stream() .filter(r -> r.getTableUuid() != null) + .map(dbMapper::toHistory) .collect( Collectors.toMap( - TableOperationsHistoryRow::getTableUuid, - r -> r, + TableOperationsHistory::getTableUuid, + h -> h, AnalyzerRunner::moreRecentHistory)); List

tables = statsRepo.find(databaseName, tableName.orElse(null), tableUuid.orElse(null)).stream() .filter(row -> row.getTableUuid() != null) - .map(Table::from) + .map(dbMapper::toTable) .collect(Collectors.toList()); /* @@ -122,11 +125,11 @@ private void analyzeDatabase( } Optional currentOp = Optional.ofNullable(currentOps.get(table.getTableUuid())); - Optional entry = + Optional entry = Optional.ofNullable(latestHistory.get(table.getTableUuid())); if (analyzer.shouldSchedule(table, currentOp, entry)) { TableOperation op = TableOperation.pending(table, analyzer.getOperationType()); - operationsRepo.save(op.toRow()); + operationsRepo.save(dbMapper.toRow(op)); log.info( "Created PENDING {} operation for table {}.{}", analyzer.getOperationType(), @@ -136,9 +139,9 @@ private void analyzeDatabase( }); } - private static TableOperationsHistoryRow moreRecentHistory( - TableOperationsHistoryRow a, TableOperationsHistoryRow b) { - Comparator byCompletedAt = + private static TableOperationsHistory moreRecentHistory( + TableOperationsHistory a, TableOperationsHistory b) { + Comparator byCompletedAt = Comparator.comparing(r -> r.getCompletedAt() != null ? r.getCompletedAt() : Instant.EPOCH); return byCompletedAt.compare(a, b) >= 0 ? a : b; } diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java index 8c7aef286..394b77eca 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java @@ -1,9 +1,9 @@ package com.linkedin.openhouse.analyzer; -import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; import com.linkedin.openhouse.optimizer.model.OperationType; import com.linkedin.openhouse.optimizer.model.Table; import com.linkedin.openhouse.optimizer.model.TableOperation; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; import java.time.Duration; import java.util.Optional; import org.springframework.beans.factory.annotation.Autowired; @@ -45,7 +45,7 @@ public boolean isEnabled(Table table) { public boolean shouldSchedule( Table table, Optional currentOp, - Optional latestHistory) { + Optional latestHistory) { return cadencePolicy.shouldSchedule(currentOp, latestHistory); } } diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java index bec541bfc..6ce2db80c 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java @@ -1,9 +1,9 @@ package com.linkedin.openhouse.analyzer; -import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; import com.linkedin.openhouse.optimizer.model.HistoryStatus; import com.linkedin.openhouse.optimizer.model.OperationStatus; import com.linkedin.openhouse.optimizer.model.TableOperation; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; import java.time.Duration; import java.time.Instant; import java.util.Optional; @@ -42,17 +42,16 @@ public class CadencePolicy { * @param latestHistory the most recent history entry for this (table, type), or empty */ public boolean shouldSchedule( - Optional currentOp, Optional latestHistory) { + Optional currentOp, Optional latestHistory) { if (currentOp.isPresent() && currentOp.get().getStatus() != OperationStatus.CANCELED) { return false; } return latestHistory.map(this::readyAfterHistoryEntry).orElse(true); } - private boolean readyAfterHistoryEntry(TableOperationsHistoryRow entry) { - HistoryStatus status = HistoryStatus.valueOf(entry.getStatus()); + private boolean readyAfterHistoryEntry(TableOperationsHistory entry) { Duration interval = - status == HistoryStatus.FAILED ? failureRetryInterval : successRetryInterval; + entry.getStatus() == HistoryStatus.FAILED ? failureRetryInterval : successRetryInterval; return Duration.between(entry.getCompletedAt(), Instant.now()).compareTo(interval) > 0; } } diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java index a7792c7ac..ba64f558a 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java @@ -1,9 +1,9 @@ package com.linkedin.openhouse.analyzer; -import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; import com.linkedin.openhouse.optimizer.model.OperationType; import com.linkedin.openhouse.optimizer.model.Table; import com.linkedin.openhouse.optimizer.model.TableOperation; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; import java.util.Optional; /** @@ -37,5 +37,5 @@ public interface OperationAnalyzer { boolean shouldSchedule( Table table, Optional currentOp, - Optional latestHistory); + Optional latestHistory); } diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java index 6ff8739fa..fbd2fecbf 100644 --- a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java @@ -2,16 +2,16 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.Mockito.never; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; -import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; -import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import com.linkedin.openhouse.optimizer.db.TableOperationsRow; +import com.linkedin.openhouse.optimizer.db.TableStatsRow; import com.linkedin.openhouse.optimizer.model.OperationType; import com.linkedin.openhouse.optimizer.model.Table; import com.linkedin.openhouse.optimizer.model.TableOperation; +import com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper; import com.linkedin.openhouse.optimizer.repository.TableOperationsHistoryRepository; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; @@ -30,7 +30,8 @@ class AnalyzerRunnerTest { private static final OperationType OFD_TYPE = OperationType.ORPHAN_FILES_DELETION; - private static final String OFD = OFD_TYPE.name(); + private static final com.linkedin.openhouse.optimizer.db.OperationType OFD_DB = + com.linkedin.openhouse.optimizer.db.OperationType.ORPHAN_FILES_DELETION; private static final String DB = "db1"; @Mock private TableStatsRepository statsRepo; @@ -38,11 +39,13 @@ class AnalyzerRunnerTest { @Mock private TableOperationsHistoryRepository historyRepo; @Mock private OperationAnalyzer analyzer; + private final ModelDbMapper dbMapper = new ModelDbMapper(); private AnalyzerRunner runner; @BeforeEach void setUp() { - runner = new AnalyzerRunner(List.of(analyzer), statsRepo, operationsRepo, historyRepo); + runner = + new AnalyzerRunner(List.of(analyzer), statsRepo, operationsRepo, historyRepo, dbMapper); when(analyzer.getOperationType()).thenReturn(OFD_TYPE); when(statsRepo.findDistinctDatabaseNames()).thenReturn(List.of(DB)); } @@ -52,12 +55,11 @@ void analyze_insertsNewRow_forEligibleTableWithNoExistingOp() { TableStatsRow statsEntity = TableStatsRow.builder().tableUuid("uuid-1").databaseName(DB).tableName("tbl1").build(); - Table expectedTable = - Table.builder().tableUuid("uuid-1").databaseName(DB).tableId("tbl1").build(); + Table expectedTable = dbMapper.toTable(statsEntity); when(statsRepo.find(DB, null, null)).thenReturn(List.of(statsEntity)); - when(operationsRepo.find(OFD, null, null, DB, null)).thenReturn(Collections.emptyList()); - when(historyRepo.findLatestPerTable(OFD)).thenReturn(Collections.emptyList()); + when(operationsRepo.find(OFD_DB, null, null, DB, null)).thenReturn(Collections.emptyList()); + when(historyRepo.findLatestPerTable(OFD_DB)).thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(true); when(analyzer.shouldSchedule(expectedTable, Optional.empty(), Optional.empty())) .thenReturn(true); @@ -70,8 +72,9 @@ void analyze_insertsNewRow_forEligibleTableWithNoExistingOp() { assertThat(saved.getTableUuid()).isEqualTo("uuid-1"); assertThat(saved.getDatabaseName()).isEqualTo(DB); assertThat(saved.getTableName()).isEqualTo("tbl1"); - assertThat(saved.getOperationType()).isEqualTo(OFD); - assertThat(saved.getStatus()).isEqualTo("PENDING"); + assertThat(saved.getOperationType()).isEqualTo(OFD_DB); + assertThat(saved.getStatus()) + .isEqualTo(com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING); assertThat(saved.getId()).isNotNull(); } @@ -80,24 +83,23 @@ void analyze_noOp_whenCadencePolicyReturnsFalseForPending() { TableStatsRow statsEntity = TableStatsRow.builder().tableUuid("uuid-1").databaseName(DB).tableName("tbl1").build(); - Table expectedTable = - Table.builder().tableUuid("uuid-1").databaseName(DB).tableId("tbl1").build(); + Table expectedTable = dbMapper.toTable(statsEntity); TableOperationsRow existingEntity = TableOperationsRow.builder() .id("existing-op-id") - .status("PENDING") + .status(com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING) .tableUuid("uuid-1") - .operationType(OFD) + .operationType(OFD_DB) .createdAt(Instant.now()) .build(); when(statsRepo.find(DB, null, null)).thenReturn(List.of(statsEntity)); - when(operationsRepo.find(OFD, null, null, DB, null)).thenReturn(List.of(existingEntity)); - when(historyRepo.findLatestPerTable(OFD)).thenReturn(Collections.emptyList()); + when(operationsRepo.find(OFD_DB, null, null, DB, null)).thenReturn(List.of(existingEntity)); + when(historyRepo.findLatestPerTable(OFD_DB)).thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(true); - TableOperation existingOp = TableOperation.from(existingEntity); + TableOperation existingOp = dbMapper.toOperation(existingEntity); when(analyzer.shouldSchedule(expectedTable, Optional.of(existingOp), Optional.empty())) .thenReturn(false); @@ -111,11 +113,11 @@ void analyze_skipsTable_whenNotEnabled() { TableStatsRow statsEntity = TableStatsRow.builder().tableUuid("uuid-1").databaseName(DB).build(); - Table expectedTable = Table.builder().tableUuid("uuid-1").databaseName(DB).build(); + Table expectedTable = dbMapper.toTable(statsEntity); when(statsRepo.find(DB, null, null)).thenReturn(List.of(statsEntity)); - when(operationsRepo.find(OFD, null, null, DB, null)).thenReturn(Collections.emptyList()); - when(historyRepo.findLatestPerTable(OFD)).thenReturn(Collections.emptyList()); + when(operationsRepo.find(OFD_DB, null, null, DB, null)).thenReturn(Collections.emptyList()); + when(historyRepo.findLatestPerTable(OFD_DB)).thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(false); runner.analyze(OFD_TYPE); @@ -128,23 +130,23 @@ void analyze_skipsTable_whenShouldScheduleReturnsFalse() { TableStatsRow statsEntity = TableStatsRow.builder().tableUuid("uuid-1").databaseName(DB).build(); - Table expectedTable = Table.builder().tableUuid("uuid-1").databaseName(DB).build(); + Table expectedTable = dbMapper.toTable(statsEntity); TableOperationsRow scheduled = TableOperationsRow.builder() .id("op-id") - .status("SCHEDULED") + .status(com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULED) .tableUuid("uuid-1") - .operationType(OFD) + .operationType(OFD_DB) .createdAt(Instant.now()) .build(); when(statsRepo.find(DB, null, null)).thenReturn(List.of(statsEntity)); - when(operationsRepo.find(OFD, null, null, DB, null)).thenReturn(List.of(scheduled)); - when(historyRepo.findLatestPerTable(OFD)).thenReturn(Collections.emptyList()); + when(operationsRepo.find(OFD_DB, null, null, DB, null)).thenReturn(List.of(scheduled)); + when(historyRepo.findLatestPerTable(OFD_DB)).thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(true); - TableOperation scheduledOp = TableOperation.from(scheduled); + TableOperation scheduledOp = dbMapper.toOperation(scheduled); when(analyzer.shouldSchedule(expectedTable, Optional.of(scheduledOp), Optional.empty())) .thenReturn(false); @@ -158,8 +160,8 @@ void analyze_skipsTable_whenTableUuidIsNull() { TableStatsRow statsEntity = TableStatsRow.builder().databaseName(DB).build(); when(statsRepo.find(DB, null, null)).thenReturn(List.of(statsEntity)); - when(operationsRepo.find(OFD, null, null, DB, null)).thenReturn(Collections.emptyList()); - when(historyRepo.findLatestPerTable(anyString())).thenReturn(Collections.emptyList()); + when(operationsRepo.find(OFD_DB, null, null, DB, null)).thenReturn(Collections.emptyList()); + when(historyRepo.findLatestPerTable(any())).thenReturn(Collections.emptyList()); runner.analyze(OFD_TYPE); diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java index 30030a2fb..633c9dceb 100644 --- a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java @@ -2,10 +2,11 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; +import com.linkedin.openhouse.optimizer.model.HistoryStatus; import com.linkedin.openhouse.optimizer.model.OperationStatus; import com.linkedin.openhouse.optimizer.model.Table; import com.linkedin.openhouse.optimizer.model.TableOperation; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; import java.time.Duration; import java.time.Instant; import java.util.Collections; @@ -67,7 +68,7 @@ void shouldSchedule_noOp_successHistoryAfterCooldown_returnsTrue() { analyzer.shouldSchedule( tableWithProperty("true"), Optional.empty(), - Optional.of(historyWithStatus("SUCCESS", longAgo)))) + Optional.of(historyWithStatus(HistoryStatus.SUCCESS, longAgo)))) .isTrue(); } @@ -78,7 +79,7 @@ void shouldSchedule_noOp_successHistoryBeforeCooldown_returnsFalse() { analyzer.shouldSchedule( tableWithProperty("true"), Optional.empty(), - Optional.of(historyWithStatus("SUCCESS", recent)))) + Optional.of(historyWithStatus(HistoryStatus.SUCCESS, recent)))) .isFalse(); } @@ -89,7 +90,7 @@ void shouldSchedule_noOp_failedHistoryAfterRetry_returnsTrue() { analyzer.shouldSchedule( tableWithProperty("true"), Optional.empty(), - Optional.of(historyWithStatus("FAILED", longAgo)))) + Optional.of(historyWithStatus(HistoryStatus.FAILED, longAgo)))) .isTrue(); } @@ -100,7 +101,7 @@ void shouldSchedule_noOp_failedHistoryBeforeRetry_returnsFalse() { analyzer.shouldSchedule( tableWithProperty("true"), Optional.empty(), - Optional.of(historyWithStatus("FAILED", recent)))) + Optional.of(historyWithStatus(HistoryStatus.FAILED, recent)))) .isFalse(); } @@ -133,7 +134,7 @@ void shouldSchedule_scheduled_returnsFalse_regardlessOfHistory() { analyzer.shouldSchedule( tableWithProperty("true"), Optional.of(opWithStatus(OperationStatus.SCHEDULED)), - Optional.of(historyWithStatus("SUCCESS", historyAt)))) + Optional.of(historyWithStatus(HistoryStatus.SUCCESS, historyAt)))) .isFalse(); } @@ -146,7 +147,7 @@ void shouldSchedule_canceled_successHistoryAfterCooldown_returnsTrue() { analyzer.shouldSchedule( tableWithProperty("true"), Optional.of(opWithStatus(OperationStatus.CANCELED)), - Optional.of(historyWithStatus("SUCCESS", longAgo)))) + Optional.of(historyWithStatus(HistoryStatus.SUCCESS, longAgo)))) .isTrue(); } @@ -157,7 +158,7 @@ void shouldSchedule_canceled_successHistoryBeforeCooldown_returnsFalse() { analyzer.shouldSchedule( tableWithProperty("true"), Optional.of(opWithStatus(OperationStatus.CANCELED)), - Optional.of(historyWithStatus("SUCCESS", recent)))) + Optional.of(historyWithStatus(HistoryStatus.SUCCESS, recent)))) .isFalse(); } @@ -190,11 +191,11 @@ private TableOperation opWithStatus(OperationStatus status) { return TableOperation.builder().status(status).build(); } - private TableOperationsHistoryRow historyWithStatus(String status, Instant completedAt) { - return TableOperationsHistoryRow.builder() + private TableOperationsHistory historyWithStatus(HistoryStatus status, Instant completedAt) { + return TableOperationsHistory.builder() .id("hist-id") .tableUuid("test-uuid") - .operationType("ORPHAN_FILES_DELETION") + .operationType(com.linkedin.openhouse.optimizer.model.OperationType.ORPHAN_FILES_DELETION) .completedAt(completedAt) .status(status) .build(); From 95456bef1c35584dd2feebcf1e67e36421b53cac Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 16:10:37 -0700 Subject: [PATCH 18/19] refactor(optimizer-analyzer): use type to/from methods; drop ModelDbMapper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AnalyzerRunner no longer injects ModelDbMapper. Conversion goes through the model types' static factories / instance methods: - Row → model: Table.fromRow, TableOperation.fromRow, TableOperationsHistory.fromRow. - Model → row: op.toRow(). - model.OperationType → db.OperationType via operationType.toDb(). Test: drop the ModelDbMapper field; use static factories directly. --- .../openhouse/analyzer/AnalyzerRunner.java | 12 +++++------- .../openhouse/analyzer/AnalyzerRunnerTest.java | 17 +++++++---------- 2 files changed, 12 insertions(+), 17 deletions(-) diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java index 0be4a5a34..265b9d303 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java @@ -4,7 +4,6 @@ import com.linkedin.openhouse.optimizer.model.Table; import com.linkedin.openhouse.optimizer.model.TableOperation; import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; -import com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper; import com.linkedin.openhouse.optimizer.repository.TableOperationsHistoryRepository; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; @@ -36,7 +35,6 @@ public class AnalyzerRunner { private final TableStatsRepository statsRepo; private final TableOperationsRepository operationsRepo; private final TableOperationsHistoryRepository historyRepo; - private final ModelDbMapper dbMapper; /** * Run the analysis loop for {@code operationType} across all databases, with no filters. @@ -77,7 +75,7 @@ private void analyzeDatabase( Optional tableUuid) { com.linkedin.openhouse.optimizer.db.OperationType dbOperationType = - dbMapper.toDbOperationType(analyzer.getOperationType()); + analyzer.getOperationType().toDb(); // Pre-load the small sides of the joins — bounded by tables in this database. Map currentOps = @@ -86,7 +84,7 @@ private void analyzeDatabase( dbOperationType, null, tableUuid.orElse(null), databaseName, tableName.orElse(null)) .stream() .filter(e -> e.getTableUuid() != null) - .map(dbMapper::toOperation) + .map(TableOperation::fromRow) .collect( Collectors.toMap( TableOperation::getTableUuid, op -> op, TableOperation::mostRecent)); @@ -94,7 +92,7 @@ private void analyzeDatabase( Map latestHistory = historyRepo.findLatestPerTable(dbOperationType).stream() .filter(r -> r.getTableUuid() != null) - .map(dbMapper::toHistory) + .map(TableOperationsHistory::fromRow) .collect( Collectors.toMap( TableOperationsHistory::getTableUuid, @@ -104,7 +102,7 @@ private void analyzeDatabase( List
tables = statsRepo.find(databaseName, tableName.orElse(null), tableUuid.orElse(null)).stream() .filter(row -> row.getTableUuid() != null) - .map(dbMapper::toTable) + .map(Table::fromRow) .collect(Collectors.toList()); /* @@ -129,7 +127,7 @@ private void analyzeDatabase( Optional.ofNullable(latestHistory.get(table.getTableUuid())); if (analyzer.shouldSchedule(table, currentOp, entry)) { TableOperation op = TableOperation.pending(table, analyzer.getOperationType()); - operationsRepo.save(dbMapper.toRow(op)); + operationsRepo.save(op.toRow()); log.info( "Created PENDING {} operation for table {}.{}", analyzer.getOperationType(), diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java index fbd2fecbf..fe9561eb9 100644 --- a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java @@ -11,7 +11,6 @@ import com.linkedin.openhouse.optimizer.model.OperationType; import com.linkedin.openhouse.optimizer.model.Table; import com.linkedin.openhouse.optimizer.model.TableOperation; -import com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper; import com.linkedin.openhouse.optimizer.repository.TableOperationsHistoryRepository; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; @@ -39,13 +38,11 @@ class AnalyzerRunnerTest { @Mock private TableOperationsHistoryRepository historyRepo; @Mock private OperationAnalyzer analyzer; - private final ModelDbMapper dbMapper = new ModelDbMapper(); private AnalyzerRunner runner; @BeforeEach void setUp() { - runner = - new AnalyzerRunner(List.of(analyzer), statsRepo, operationsRepo, historyRepo, dbMapper); + runner = new AnalyzerRunner(List.of(analyzer), statsRepo, operationsRepo, historyRepo); when(analyzer.getOperationType()).thenReturn(OFD_TYPE); when(statsRepo.findDistinctDatabaseNames()).thenReturn(List.of(DB)); } @@ -55,7 +52,7 @@ void analyze_insertsNewRow_forEligibleTableWithNoExistingOp() { TableStatsRow statsEntity = TableStatsRow.builder().tableUuid("uuid-1").databaseName(DB).tableName("tbl1").build(); - Table expectedTable = dbMapper.toTable(statsEntity); + Table expectedTable = Table.fromRow(statsEntity); when(statsRepo.find(DB, null, null)).thenReturn(List.of(statsEntity)); when(operationsRepo.find(OFD_DB, null, null, DB, null)).thenReturn(Collections.emptyList()); @@ -83,7 +80,7 @@ void analyze_noOp_whenCadencePolicyReturnsFalseForPending() { TableStatsRow statsEntity = TableStatsRow.builder().tableUuid("uuid-1").databaseName(DB).tableName("tbl1").build(); - Table expectedTable = dbMapper.toTable(statsEntity); + Table expectedTable = Table.fromRow(statsEntity); TableOperationsRow existingEntity = TableOperationsRow.builder() @@ -99,7 +96,7 @@ void analyze_noOp_whenCadencePolicyReturnsFalseForPending() { when(historyRepo.findLatestPerTable(OFD_DB)).thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(true); - TableOperation existingOp = dbMapper.toOperation(existingEntity); + TableOperation existingOp = TableOperation.fromRow(existingEntity); when(analyzer.shouldSchedule(expectedTable, Optional.of(existingOp), Optional.empty())) .thenReturn(false); @@ -113,7 +110,7 @@ void analyze_skipsTable_whenNotEnabled() { TableStatsRow statsEntity = TableStatsRow.builder().tableUuid("uuid-1").databaseName(DB).build(); - Table expectedTable = dbMapper.toTable(statsEntity); + Table expectedTable = Table.fromRow(statsEntity); when(statsRepo.find(DB, null, null)).thenReturn(List.of(statsEntity)); when(operationsRepo.find(OFD_DB, null, null, DB, null)).thenReturn(Collections.emptyList()); @@ -130,7 +127,7 @@ void analyze_skipsTable_whenShouldScheduleReturnsFalse() { TableStatsRow statsEntity = TableStatsRow.builder().tableUuid("uuid-1").databaseName(DB).build(); - Table expectedTable = dbMapper.toTable(statsEntity); + Table expectedTable = Table.fromRow(statsEntity); TableOperationsRow scheduled = TableOperationsRow.builder() @@ -146,7 +143,7 @@ void analyze_skipsTable_whenShouldScheduleReturnsFalse() { when(historyRepo.findLatestPerTable(OFD_DB)).thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(true); - TableOperation scheduledOp = dbMapper.toOperation(scheduled); + TableOperation scheduledOp = TableOperation.fromRow(scheduled); when(analyzer.shouldSchedule(expectedTable, Optional.of(scheduledOp), Optional.empty())) .thenReturn(false); From 7f5136017e19967d659c7283bc052181a039afa8 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 18 May 2026 10:38:17 -0700 Subject: [PATCH 19/19] chore(analyzer): add TODOs for scale tests and query-builder migration Captures two open items from PR review on AnalyzerRunner: - scale test: empirically validate the 10k-tables-per-db working-set bound. - query-builder: migrate the optional-filter find(...) calls off raw JPQL to Criteria API or jOOQ once the scaffolding stabilizes. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../com/linkedin/openhouse/analyzer/AnalyzerRunner.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java index 265b9d303..69b2f43d0 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java @@ -25,6 +25,10 @@ * into maps once per database before the table loop. This is correct at small scale (≤~100k * tables); past that the per-db query shape and projection need further tuning. Scale-up work is * tracked in BDP-102182. + * + *

// TODO(scale-test): benchmark the per-db working set at up to 10k tables and measure JVM heap + * residency for the three intermediate maps; per-db iteration bounds memory by tables-per-db rather + * than tables-total, but the upper bound still needs empirical validation. */ @Slf4j @Component @@ -78,6 +82,10 @@ private void analyzeDatabase( analyzer.getOperationType().toDb(); // Pre-load the small sides of the joins — bounded by tables in this database. + // TODO(query-builder): the JPQL optional-filter shape used by these find(...) calls gets + // unwieldy as the filter count grows. Migrate to Criteria API or jOOQ once the scaffolding + // stabilizes — applies to operationsRepo.find, historyRepo.findLatestPerTable, and + // statsRepo.find below. Map currentOps = operationsRepo .find(