From e1fbbfcf42674f062d3eb46bd4cbe5db3617a8e5 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Tue, 7 Apr 2026 09:34:36 -0700 Subject: [PATCH 01/19] =?UTF-8?q?feat(optimizer):=20add=20scheduler=20app?= =?UTF-8?q?=20=E2=80=94=20claims=20PENDING=20ops=20and=20submits=20Spark?= =?UTF-8?q?=20jobs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces apps/optimizer-scheduler, a Spring Boot CommandLineRunner that: 1. Loads PENDING operations from the optimizer DB 2. Bin-packs them by file count (first-fit descending) 3. Claims rows via CAS (PENDING → SCHEDULING → SCHEDULED) 4. Submits one batched Spark job per bin via the Jobs Service Adds three @Modifying CAS methods to TableOperationsRepository: cancelDuplicatePending, markScheduling, markScheduled — required for safe concurrent scheduling without double-submitting. Co-Authored-By: Claude Opus 4.6 --- apps/optimizer-scheduler/build.gradle | 19 ++ .../openhouse/scheduler/BinPacker.java | 74 +++++++ .../scheduler/SchedulerApplication.java | 24 +++ .../openhouse/scheduler/SchedulerRunner.java | 121 ++++++++++++ .../scheduler/client/JobsServiceClient.java | 80 ++++++++ .../scheduler/config/SchedulerConfig.java | 27 +++ .../src/main/resources/application.properties | 11 ++ .../openhouse/scheduler/BinPackerTest.java | 104 ++++++++++ .../scheduler/SchedulerRunnerTest.java | 186 ++++++++++++++++++ .../resources/application-test.properties | 11 ++ .../src/test/resources/schema.sql | 23 +++ .../repository/TableOperationsRepository.java | 44 +++++ settings.gradle | 1 + 13 files changed, 725 insertions(+) create mode 100644 apps/optimizer-scheduler/build.gradle create mode 100644 apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/BinPacker.java create mode 100644 apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerApplication.java create mode 100644 apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java create mode 100644 apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/client/JobsServiceClient.java create mode 100644 apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/config/SchedulerConfig.java create mode 100644 apps/optimizer-scheduler/src/main/resources/application.properties create mode 100644 apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/BinPackerTest.java create mode 100644 apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java create mode 100644 apps/optimizer-scheduler/src/test/resources/application-test.properties create mode 100644 apps/optimizer-scheduler/src/test/resources/schema.sql diff --git a/apps/optimizer-scheduler/build.gradle b/apps/optimizer-scheduler/build.gradle new file mode 100644 index 000000000..a232ad28a --- /dev/null +++ b/apps/optimizer-scheduler/build.gradle @@ -0,0 +1,19 @@ +plugins { + id 'openhouse.springboot-ext-conventions' + id 'org.springframework.boot' version '2.7.8' +} + +dependencies { + implementation project(':apps:optimizer') + implementation 'org.springframework.boot:spring-boot-starter:2.7.8' + implementation 'org.springframework.boot:spring-boot-starter-webflux:2.7.8' + implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8' + implementation 'org.springframework.boot:spring-boot-starter-aop:2.7.8' + runtimeOnly 'mysql:mysql-connector-java:8.0.33' + testImplementation 'org.springframework.boot:spring-boot-starter-test:2.7.8' + testRuntimeOnly 'com.h2database:h2' +} + +test { + useJUnitPlatform() +} diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/BinPacker.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/BinPacker.java new file mode 100644 index 000000000..240a50661 --- /dev/null +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/BinPacker.java @@ -0,0 +1,74 @@ +package com.linkedin.openhouse.scheduler; + +import com.linkedin.openhouse.optimizer.entity.TableOperationRow; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +/** + * Greedy first-fit descending bin-packer for table operations. + * + *

Tables are sorted by descending file count, then assigned to the first bin whose running total + * stays below {@code maxFilesPerBin}. A table larger than the limit gets its own bin (oversized + * bins are allowed — we never drop a table). + * + *

Tables with no stats entry are treated as file count = 0 and are still schedulable. + */ +public final class BinPacker { + + private BinPacker() {} + + /** + * Pack {@code pending} rows into bins. + * + * @param pending operations to pack; must not be empty + * @param fileCountByUuid map from tableUuid to number of current data files; missing entries + * default to 0 + * @param maxFilesPerBin maximum total file count per bin + * @return list of bins, each bin being a non-empty list of rows + */ + public static List> pack( + List pending, Map fileCountByUuid, long maxFilesPerBin) { + + if (pending.isEmpty()) { + return List.of(); + } + + List sorted = + pending.stream() + .sorted( + Comparator.comparingLong( + (TableOperationRow r) -> fileCountByUuid.getOrDefault(r.getTableUuid(), 0L)) + .reversed()) + .collect(Collectors.toList()); + + List> bins = new ArrayList<>(); + List binTotals = new ArrayList<>(); + + for (TableOperationRow row : sorted) { + long cost = fileCountByUuid.getOrDefault(row.getTableUuid(), 0L); + + int placed = -1; + for (int i = 0; i < bins.size(); i++) { + if (binTotals.get(i) + cost <= maxFilesPerBin || binTotals.get(i) == 0) { + placed = i; + break; + } + } + + if (placed >= 0) { + bins.get(placed).add(row); + binTotals.set(placed, binTotals.get(placed) + cost); + } else { + List newBin = new ArrayList<>(); + newBin.add(row); + bins.add(newBin); + binTotals.add(cost); + } + } + + return bins; + } +} diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerApplication.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerApplication.java new file mode 100644 index 000000000..216ae9748 --- /dev/null +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerApplication.java @@ -0,0 +1,24 @@ +package com.linkedin.openhouse.scheduler; + +import org.springframework.boot.CommandLineRunner; +import org.springframework.boot.SpringApplication; +import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.boot.autoconfigure.domain.EntityScan; +import org.springframework.context.annotation.Bean; +import org.springframework.data.jpa.repository.config.EnableJpaRepositories; + +/** Entry point for the Optimizer Scheduler application. */ +@SpringBootApplication +@EntityScan(basePackages = "com.linkedin.openhouse.optimizer.entity") +@EnableJpaRepositories(basePackages = "com.linkedin.openhouse.optimizer.repository") +public class SchedulerApplication { + + public static void main(String[] args) { + SpringApplication.run(SchedulerApplication.class, args); + } + + @Bean + public CommandLineRunner run(SchedulerRunner runner) { + return args -> runner.schedule(); + } +} diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java new file mode 100644 index 000000000..d6f3d404f --- /dev/null +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java @@ -0,0 +1,121 @@ +package com.linkedin.openhouse.scheduler; + +import com.linkedin.openhouse.optimizer.entity.TableOperationRow; +import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; +import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; +import com.linkedin.openhouse.scheduler.client.JobsServiceClient; +import java.time.Instant; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Component; +import org.springframework.transaction.annotation.Transactional; + +/** Reads PENDING rows from the optimizer DB, bin-packs them, and submits one Spark job per bin. */ +@Slf4j +@Component +@RequiredArgsConstructor +public class SchedulerRunner { + + private final TableOperationsRepository operationsRepo; + private final TableStatsRepository statsRepo; + private final JobsServiceClient jobsClient; + + @Value("${scheduler.bin-size-max-files}") + private long maxFiles; + + @Value("${scheduler.operation-type}") + private String operationType; + + @Value("${scheduler.results-endpoint}") + private String resultsEndpoint; + + @Transactional + public void schedule() { + List pending = + operationsRepo.find(operationType, "PENDING", null, null, null); + if (pending.isEmpty()) { + log.info("No PENDING operations of type {}; nothing to schedule", operationType); + return; + } + + Set uuids = + pending.stream().map(TableOperationRow::getTableUuid).collect(Collectors.toSet()); + + Map fileCountByUuid = + statsRepo.findAllById(uuids).stream() + .collect( + Collectors.toMap( + row -> row.getTableUuid(), + row -> { + if (row.getStats() == null || row.getStats().getSnapshot() == null) { + return 0L; + } + Long count = row.getStats().getSnapshot().getNumCurrentFiles(); + return count != null ? count : 0L; + })); + + List> bins = BinPacker.pack(pending, fileCountByUuid, maxFiles); + log.info( + "Packed {} PENDING operations into {} bins (maxFiles={})", + pending.size(), + bins.size(), + maxFiles); + + bins.forEach(this::submitBin); + } + + private void submitBin(List bin) { + // 0. Cancel duplicate PENDING rows per (tableUuid, operationType) — keep only the first row. + bin.stream() + .collect(Collectors.groupingBy(TableOperationRow::getTableUuid)) + .forEach( + (uuid, rows) -> { + TableOperationRow keep = rows.get(0); + operationsRepo.cancelDuplicatePending(uuid, operationType, keep.getId()); + }); + + // 1. Claim all rows (PENDING → SCHEDULING) — prevents a second scheduler instance from + // double-submitting. Any row already claimed (returns 0) is excluded from this batch. + List claimed = + bin.stream() + .filter( + r -> operationsRepo.markScheduling(r.getId(), r.getVersion(), Instant.now()) == 1) + .collect(Collectors.toList()); + + if (claimed.isEmpty()) { + log.info("All rows in bin already claimed by another scheduler instance; skipping"); + return; + } + + // 2. Submit a job for the claimed rows only. + List tableNames = + claimed.stream() + .map(r -> r.getDatabaseName() + "." + r.getTableName()) + .collect(Collectors.toList()); + List opIds = + claimed.stream().map(TableOperationRow::getId).collect(Collectors.toList()); + + String jobName = "batched-" + operationType.toLowerCase() + "-" + Instant.now().toEpochMilli(); + Optional jobId = + jobsClient.launch(jobName, operationType, tableNames, opIds, resultsEndpoint); + + if (jobId.isPresent()) { + // 3. Transition SCHEDULING → SCHEDULED with the jobId. + for (TableOperationRow r : claimed) { + operationsRepo.markScheduled(r.getId(), r.getVersion() + 1, jobId.get()); + } + log.info("Submitted job {} for {} tables", jobId.get(), claimed.size()); + } else { + log.warn( + "Job submission failed for {} SCHEDULING rows; the analyzer's scheduledTimeout will" + + " detect and overwrite stale rows", + claimed.size()); + } + } +} diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/client/JobsServiceClient.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/client/JobsServiceClient.java new file mode 100644 index 000000000..76a96f957 --- /dev/null +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/client/JobsServiceClient.java @@ -0,0 +1,80 @@ +package com.linkedin.openhouse.scheduler.client; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import java.time.Duration; +import java.util.List; +import java.util.Optional; +import lombok.extern.slf4j.Slf4j; +import org.springframework.web.reactive.function.client.WebClient; + +/** + * Client for the OpenHouse Jobs Service. + * + *

Submits one {@code BatchedOrphanFilesDeletionSparkApp} job per bin via {@code POST /jobs}. + */ +@Slf4j +public class JobsServiceClient { + + private static final ObjectMapper MAPPER = new ObjectMapper(); + private static final Duration TIMEOUT = Duration.ofSeconds(30); + + private final WebClient webClient; + private final String clusterId; + + public JobsServiceClient(WebClient webClient, String clusterId) { + this.webClient = webClient; + this.clusterId = clusterId; + } + + /** + * Submit a batched Spark job for the given tables. + * + * @param jobName human-readable name for the job + * @param jobType operation type string (e.g. "ORPHAN_FILES_DELETION") + * @param tableNames fully-qualified table names (db.table) + * @param operationIds operation UUIDs — parallel to tableNames + * @param resultsEndpoint base URL the Spark app PATCHes results back to + * @return job ID if the submission succeeded, empty if an error occurred + */ + public Optional launch( + String jobName, + String jobType, + List tableNames, + List operationIds, + String resultsEndpoint) { + try { + ObjectNode body = MAPPER.createObjectNode(); + body.put("jobName", jobName); + body.put("clusterId", clusterId); + + ObjectNode jobConf = body.putObject("jobConf"); + jobConf.put("jobType", jobType); + + ArrayNode args = jobConf.putArray("args"); + args.add("--tableNames"); + args.add(String.join(",", tableNames)); + args.add("--operationIds"); + args.add(String.join(",", operationIds)); + args.add("--resultsEndpoint"); + args.add(resultsEndpoint); + + String responseBody = + webClient + .post() + .uri("/jobs") + .bodyValue(body) + .retrieve() + .bodyToMono(String.class) + .timeout(TIMEOUT) + .block(); + + String jobId = MAPPER.readTree(responseBody).path("jobId").asText(null); + return Optional.ofNullable(jobId); + } catch (Exception e) { + log.error("Failed to submit job '{}': {}", jobName, e.getMessage()); + return Optional.empty(); + } + } +} diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/config/SchedulerConfig.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/config/SchedulerConfig.java new file mode 100644 index 000000000..afdc36240 --- /dev/null +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/config/SchedulerConfig.java @@ -0,0 +1,27 @@ +package com.linkedin.openhouse.scheduler.config; + +import com.linkedin.openhouse.scheduler.client.JobsServiceClient; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.web.reactive.function.client.WebClient; + +@Configuration +public class SchedulerConfig { + + @Value("${jobs.base-uri}") + private String jobsBaseUri; + + @Value("${scheduler.cluster-id}") + private String clusterId; + + @Bean + public WebClient jobsWebClient() { + return WebClient.builder().baseUrl(jobsBaseUri).build(); + } + + @Bean + public JobsServiceClient jobsServiceClient(WebClient jobsWebClient) { + return new JobsServiceClient(jobsWebClient, clusterId); + } +} diff --git a/apps/optimizer-scheduler/src/main/resources/application.properties b/apps/optimizer-scheduler/src/main/resources/application.properties new file mode 100644 index 000000000..75074b0fe --- /dev/null +++ b/apps/optimizer-scheduler/src/main/resources/application.properties @@ -0,0 +1,11 @@ +spring.application.name=openhouse-optimizer-scheduler +spring.main.web-application-type=none +spring.datasource.url=${OPTIMIZER_DB_URL:jdbc:h2:mem:schedulerdb;DB_CLOSE_DELAY=-1;MODE=MySQL} +spring.datasource.username=${OPTIMIZER_DB_USER:sa} +spring.datasource.password=${OPTIMIZER_DB_PASSWORD:} +spring.jpa.hibernate.ddl-auto=none +jobs.base-uri=${JOBS_BASE_URI:http://localhost:8002} +scheduler.bin-size-max-files=${SCHEDULER_BIN_SIZE_MAX_FILES:1000000} +scheduler.operation-type=${SCHEDULER_OPERATION_TYPE:ORPHAN_FILES_DELETION} +scheduler.results-endpoint=${SCHEDULER_RESULTS_ENDPOINT:http://openhouse-optimizer:8080/v1/table-operations} +scheduler.cluster-id=${SCHEDULER_CLUSTER_ID:LocalHadoopCluster} diff --git a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/BinPackerTest.java b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/BinPackerTest.java new file mode 100644 index 000000000..69ea4d3bc --- /dev/null +++ b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/BinPackerTest.java @@ -0,0 +1,104 @@ +package com.linkedin.openhouse.scheduler; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.linkedin.openhouse.optimizer.entity.TableOperationRow; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import org.junit.jupiter.api.Test; + +class BinPackerTest { + + private static TableOperationRow row(String uuid) { + return TableOperationRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(uuid) + .databaseName("db") + .tableName("tbl_" + uuid) + .operationType("ORPHAN_FILES_DELETION") + .status("PENDING") + .build(); + } + + @Test + void emptyInput_returnsEmptyBins() { + assertThat(BinPacker.pack(List.of(), Map.of(), 1_000_000L)).isEmpty(); + } + + @Test + void singleTable_oneBin() { + TableOperationRow r = row("uuid-1"); + List> bins = + BinPacker.pack(List.of(r), Map.of("uuid-1", 100L), 1_000_000L); + + assertThat(bins).hasSize(1); + assertThat(bins.get(0)).containsExactly(r); + } + + @Test + void tablesUnderLimit_oneBin() { + TableOperationRow r1 = row("a"); + TableOperationRow r2 = row("b"); + TableOperationRow r3 = row("c"); + + List> bins = + BinPacker.pack( + List.of(r1, r2, r3), Map.of("a", 300_000L, "b", 300_000L, "c", 300_000L), 1_000_000L); + + assertThat(bins).hasSize(1); + assertThat(bins.get(0)).hasSize(3); + } + + @Test + void tablesOverLimit_twoBins() { + TableOperationRow r1 = row("a"); + TableOperationRow r2 = row("b"); + TableOperationRow r3 = row("c"); + + // 600k + 600k would exceed 1M; 400k fits after 600k + List> bins = + BinPacker.pack( + List.of(r1, r2, r3), Map.of("a", 600_000L, "b", 600_000L, "c", 400_000L), 1_000_000L); + + assertThat(bins).hasSize(2); + assertThat(bins.get(0)).hasSize(2); // 600k + 400k + assertThat(bins.get(1)).hasSize(1); // 600k alone + } + + @Test + void largeTableAlone_exceedsLimitSingleBin() { + TableOperationRow r = row("big"); + List> bins = + BinPacker.pack(List.of(r), Map.of("big", 5_000_000L), 1_000_000L); + + assertThat(bins).hasSize(1); + assertThat(bins.get(0)).containsExactly(r); + } + + @Test + void noStats_fileCountZero_groupedNormally() { + TableOperationRow r1 = row("x"); + TableOperationRow r2 = row("y"); + + // No stats entries — both get cost 0, both fit in one bin + List> bins = BinPacker.pack(List.of(r1, r2), Map.of(), 1_000_000L); + + assertThat(bins).hasSize(1); + assertThat(bins.get(0)).hasSize(2); + } + + @Test + void sortedDescending_largestTablesFirst() { + TableOperationRow small = row("small"); + TableOperationRow large = row("large"); + + List> bins = + BinPacker.pack(List.of(small, large), Map.of("small", 100L, "large", 900_000L), 1_000_000L); + + // Both fit in one bin, large should appear first due to descending sort + assertThat(bins).hasSize(1); + assertThat(bins.get(0).get(0).getTableUuid()).isEqualTo("large"); + assertThat(bins.get(0).get(1).getTableUuid()).isEqualTo("small"); + } +} diff --git a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java new file mode 100644 index 000000000..bc3578572 --- /dev/null +++ b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java @@ -0,0 +1,186 @@ +package com.linkedin.openhouse.scheduler; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyList; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import com.linkedin.openhouse.optimizer.entity.TableOperationRow; +import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import com.linkedin.openhouse.optimizer.model.TableStats; +import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; +import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; +import com.linkedin.openhouse.scheduler.client.JobsServiceClient; +import java.util.List; +import java.util.Optional; +import java.util.UUID; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.ArgumentCaptor; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.springframework.test.util.ReflectionTestUtils; + +@ExtendWith(MockitoExtension.class) +class SchedulerRunnerTest { + + @Mock private TableOperationsRepository operationsRepo; + @Mock private TableStatsRepository statsRepo; + @Mock private JobsServiceClient jobsClient; + + @InjectMocks private SchedulerRunner runner; + + @BeforeEach + void injectConfig() { + ReflectionTestUtils.setField(runner, "maxFiles", 1_000_000L); + ReflectionTestUtils.setField(runner, "operationType", "ORPHAN_FILES_DELETION"); + ReflectionTestUtils.setField( + runner, "resultsEndpoint", "http://localhost:8080/v1/table-operations"); + } + + private TableOperationRow pendingRow(String uuid, String db, String table) { + return TableOperationRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(uuid) + .databaseName(db) + .tableName(table) + .operationType("ORPHAN_FILES_DELETION") + .status("PENDING") + .version(0L) + .build(); + } + + private TableStatsRow statsRow(String uuid, long numCurrentFiles) { + TableStats stats = + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().numCurrentFiles(numCurrentFiles).build()) + .build(); + return TableStatsRow.builder().tableUuid(uuid).stats(stats).build(); + } + + @Test + void schedule_noPendingOps_noJobSubmitted() { + when(operationsRepo.find("ORPHAN_FILES_DELETION", "PENDING", null, null, null)) + .thenReturn(List.of()); + + runner.schedule(); + + verify(jobsClient, never()).launch(anyString(), anyString(), anyList(), anyList(), anyString()); + } + + @Test + void schedule_twoStepClaim_claimsAndSchedules() { + String uuid = UUID.randomUUID().toString(); + TableOperationRow row = pendingRow(uuid, "db1", "tbl1"); + + when(operationsRepo.find("ORPHAN_FILES_DELETION", "PENDING", null, null, null)) + .thenReturn(List.of(row)); + when(statsRepo.findAllById(any())).thenReturn(List.of(statsRow(uuid, 100_000L))); + when(operationsRepo.markScheduling(anyString(), anyLong(), any())).thenReturn(1); + when(operationsRepo.markScheduled(anyString(), anyLong(), anyString())).thenReturn(1); + when(jobsClient.launch(anyString(), anyString(), anyList(), anyList(), anyString())) + .thenReturn(Optional.of("job-123")); + + runner.schedule(); + + // Step 1: PENDING → SCHEDULING + verify(operationsRepo).markScheduling(eq(row.getId()), eq(0L), any()); + // Step 2: SCHEDULING → SCHEDULED with jobId + verify(operationsRepo).markScheduled(eq(row.getId()), eq(1L), eq("job-123")); + + ArgumentCaptor> tableNamesCaptor = ArgumentCaptor.forClass(List.class); + ArgumentCaptor> opIdsCaptor = ArgumentCaptor.forClass(List.class); + verify(jobsClient) + .launch( + anyString(), + eq("ORPHAN_FILES_DELETION"), + tableNamesCaptor.capture(), + opIdsCaptor.capture(), + anyString()); + + assertThat(tableNamesCaptor.getValue()).containsExactly("db1.tbl1"); + assertThat(opIdsCaptor.getValue()).containsExactly(row.getId()); + } + + @Test + void schedule_jobLaunchFails_rowsStayScheduling() { + String uuid = UUID.randomUUID().toString(); + TableOperationRow row = pendingRow(uuid, "db1", "tbl1"); + + when(operationsRepo.find("ORPHAN_FILES_DELETION", "PENDING", null, null, null)) + .thenReturn(List.of(row)); + when(statsRepo.findAllById(any())).thenReturn(List.of()); + when(operationsRepo.markScheduling(anyString(), anyLong(), any())).thenReturn(1); + when(jobsClient.launch(anyString(), anyString(), anyList(), anyList(), anyString())) + .thenReturn(Optional.empty()); + + runner.schedule(); + + verify(operationsRepo).markScheduling(eq(row.getId()), eq(0L), any()); + verify(operationsRepo, never()).markScheduled(anyString(), anyLong(), anyString()); + } + + @Test + void schedule_rowAlreadyClaimed_skipsSubmit() { + String uuid = UUID.randomUUID().toString(); + TableOperationRow row = pendingRow(uuid, "db1", "tbl1"); + + when(operationsRepo.find("ORPHAN_FILES_DELETION", "PENDING", null, null, null)) + .thenReturn(List.of(row)); + when(statsRepo.findAllById(any())).thenReturn(List.of()); + when(operationsRepo.markScheduling(anyString(), anyLong(), any())).thenReturn(0); + + runner.schedule(); + + verify(jobsClient, never()).launch(anyString(), anyString(), anyList(), anyList(), anyString()); + } + + @Test + void schedule_cancelsDuplicatePendingBeforeClaim() { + String uuid = UUID.randomUUID().toString(); + TableOperationRow row1 = pendingRow(uuid, "db1", "tbl1"); + TableOperationRow row2 = pendingRow(uuid, "db1", "tbl1"); + + when(operationsRepo.find("ORPHAN_FILES_DELETION", "PENDING", null, null, null)) + .thenReturn(List.of(row1, row2)); + when(statsRepo.findAllById(any())).thenReturn(List.of()); + when(operationsRepo.markScheduling(anyString(), anyLong(), any())).thenReturn(1); + when(operationsRepo.markScheduled(anyString(), anyLong(), anyString())).thenReturn(1); + when(jobsClient.launch(anyString(), anyString(), anyList(), anyList(), anyString())) + .thenReturn(Optional.of("job-789")); + + runner.schedule(); + + verify(operationsRepo) + .cancelDuplicatePending(eq(uuid), eq("ORPHAN_FILES_DELETION"), eq(row1.getId())); + } + + @Test + void schedule_claimsAllRowsInBin() { + String uuid1 = UUID.randomUUID().toString(); + String uuid2 = UUID.randomUUID().toString(); + TableOperationRow row1 = pendingRow(uuid1, "db1", "tbl1"); + TableOperationRow row2 = pendingRow(uuid2, "db1", "tbl2"); + + when(operationsRepo.find("ORPHAN_FILES_DELETION", "PENDING", null, null, null)) + .thenReturn(List.of(row1, row2)); + when(statsRepo.findAllById(any())).thenReturn(List.of()); + when(operationsRepo.markScheduling(anyString(), anyLong(), any())).thenReturn(1); + when(operationsRepo.markScheduled(anyString(), anyLong(), anyString())).thenReturn(1); + when(jobsClient.launch(anyString(), anyString(), anyList(), anyList(), anyString())) + .thenReturn(Optional.of("job-456")); + + runner.schedule(); + + verify(operationsRepo, times(2)).markScheduling(anyString(), eq(0L), any()); + verify(operationsRepo, times(2)).markScheduled(anyString(), eq(1L), eq("job-456")); + } +} diff --git a/apps/optimizer-scheduler/src/test/resources/application-test.properties b/apps/optimizer-scheduler/src/test/resources/application-test.properties new file mode 100644 index 000000000..3dcec13da --- /dev/null +++ b/apps/optimizer-scheduler/src/test/resources/application-test.properties @@ -0,0 +1,11 @@ +spring.datasource.url=jdbc:h2:mem:schedulertestdb;DB_CLOSE_DELAY=-1;MODE=MySQL +spring.datasource.username=sa +spring.datasource.password= +spring.jpa.hibernate.ddl-auto=none +spring.sql.init.mode=always +spring.sql.init.schema-locations=classpath:schema.sql +jobs.base-uri=http://localhost:9999 +scheduler.bin-size-max-files=1000000 +scheduler.operation-type=ORPHAN_FILES_DELETION +scheduler.results-endpoint=http://localhost:8080/v1/table-operations +scheduler.cluster-id=test-cluster diff --git a/apps/optimizer-scheduler/src/test/resources/schema.sql b/apps/optimizer-scheduler/src/test/resources/schema.sql new file mode 100644 index 000000000..75de24d3d --- /dev/null +++ b/apps/optimizer-scheduler/src/test/resources/schema.sql @@ -0,0 +1,23 @@ +CREATE TABLE IF NOT EXISTS table_operations ( + id VARCHAR(36) NOT NULL, + table_uuid VARCHAR(36) NOT NULL, + database_name VARCHAR(255) NOT NULL, + table_name VARCHAR(255) NOT NULL, + operation_type VARCHAR(50) NOT NULL, + status VARCHAR(20) NOT NULL, + created_at TIMESTAMP(6) NOT NULL, + scheduled_at TIMESTAMP(6), + job_id VARCHAR(255), + version BIGINT, + PRIMARY KEY (id) +); + +CREATE TABLE IF NOT EXISTS table_stats ( + table_uuid VARCHAR(36) NOT NULL, + database_id VARCHAR(255) NOT NULL, + table_name VARCHAR(255) NOT NULL, + stats TEXT, + table_properties TEXT, + updated_at TIMESTAMP(6) NOT NULL, + PRIMARY KEY (table_uuid) +); diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java index 27424dfdc..0ba241b97 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -1,8 +1,10 @@ package com.linkedin.openhouse.optimizer.repository; import com.linkedin.openhouse.optimizer.entity.TableOperationRow; +import java.time.Instant; import java.util.List; import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Modifying; import org.springframework.data.jpa.repository.Query; import org.springframework.data.repository.query.Param; @@ -26,4 +28,46 @@ List find( @Param("tableUuid") String tableUuid, @Param("databaseName") String databaseName, @Param("tableName") String tableName); + + /** + * Delete duplicate PENDING rows for the same (tableUuid, operationType), keeping only the + * specified row. Used by the Scheduler to deduplicate before claiming. + */ + @Modifying + @Query( + "DELETE FROM TableOperationRow r " + + "WHERE r.tableUuid = :tableUuid " + + "AND r.operationType = :operationType " + + "AND r.status = 'PENDING' " + + "AND r.id <> :keepId") + int cancelDuplicatePending( + @Param("tableUuid") String tableUuid, + @Param("operationType") String operationType, + @Param("keepId") String keepId); + + /** + * CAS transition: PENDING → SCHEDULING. Returns 1 if the row was claimed, 0 if already claimed by + * another instance or the version has changed. + */ + @Modifying + @Query( + "UPDATE TableOperationRow r " + + "SET r.status = 'SCHEDULING', r.scheduledAt = :scheduledAt, r.version = r.version + 1 " + + "WHERE r.id = :id AND r.version = :version AND r.status = 'PENDING'") + int markScheduling( + @Param("id") String id, + @Param("version") long version, + @Param("scheduledAt") Instant scheduledAt); + + /** + * CAS transition: SCHEDULING → SCHEDULED with the external job ID. Returns 1 on success, 0 if the + * row is no longer in SCHEDULING state at the expected version. + */ + @Modifying + @Query( + "UPDATE TableOperationRow r " + + "SET r.status = 'SCHEDULED', r.jobId = :jobId, r.version = r.version + 1 " + + "WHERE r.id = :id AND r.version = :version AND r.status = 'SCHEDULING'") + int markScheduled( + @Param("id") String id, @Param("version") long version, @Param("jobId") String jobId); } diff --git a/settings.gradle b/settings.gradle index 52873b677..48c81f458 100644 --- a/settings.gradle +++ b/settings.gradle @@ -52,6 +52,7 @@ include ':services:jobs' include ':services:optimizer' include ':apps:optimizer' include ':apps:optimizer-analyzer' +include ':apps:optimizer-scheduler' include ':services:tables' include ':tables-test-fixtures:tables-test-fixtures-iceberg-1.2' include ':tables-test-fixtures:tables-test-fixtures-iceberg-1.5' From ded97d06c3006e5790159df2c146c136e6e3155e Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 13 May 2026 17:09:57 -0700 Subject: [PATCH 02/19] refactor(optimizer-scheduler): per-op BinPacker interface, TableOperation domain, batched writes, retry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses unresolved review threads on #534: - BinPacker becomes an interface with a single pack(...) method. Strategy is decoupled from operation type; binding to OFD lives in SchedulerConfig as a Map bean. - FileCountBinPacker is the first strategy — named after the dimension it packs on, not the op it serves. maxFilesPerBin is constructor-injected via scheduler.ofd.max-files-per-bin; pack(...) reads fileCount off TableOperation rather than taking a side map. - SchedulerRunner.schedule now takes required OperationType plus optional databaseName/tableName (mirror of AnalyzerRunner.analyze). It loads PENDING rows, converts row → TableOperation, fetches stats for fileCount enrichment, and dispatches to the registered BinPacker. No raw TableOperationRow flows through the per-bin logic. - Writes are batched: cancelDuplicatePendingBatch, markSchedulingBatch, markScheduledBatch, markPendingBatch on TableOperationsRepository each issue one UPDATE/DELETE. - On job-launch failure the scheduler reverts claimed rows from SCHEDULING back to PENDING via markPendingBatch so the next pass retries (replaces the previous log-and-leak that relied on the analyzer's removed scheduledTimeout). - SchedulerApplication's CommandLineRunner loops over the configured Map and calls schedule(opType) per entry. Tests: FileCountBinPackerTest covers the packing strategy; SchedulerRunnerTest covers per-op dispatch, batched claim/mark-scheduled on success, batch-revert on submit failure, duplicate-PENDING dedup, and the unknown-operation-type no-op path. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../openhouse/scheduler/BinPacker.java | 74 ++------- .../scheduler/FileCountBinPacker.java | 63 ++++++++ .../scheduler/SchedulerApplication.java | 10 +- .../openhouse/scheduler/SchedulerRunner.java | 146 ++++++++++-------- .../scheduler/config/SchedulerConfig.java | 17 ++ .../src/main/resources/application.properties | 3 +- .../openhouse/scheduler/BinPackerTest.java | 104 ------------- .../scheduler/FileCountBinPackerTest.java | 87 +++++++++++ .../scheduler/SchedulerRunnerTest.java | 140 ++++++++--------- .../resources/application-test.properties | 3 +- .../repository/TableOperationsRepository.java | 49 ++++++ 11 files changed, 387 insertions(+), 309 deletions(-) create mode 100644 apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/FileCountBinPacker.java delete mode 100644 apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/BinPackerTest.java create mode 100644 apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/FileCountBinPackerTest.java diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/BinPacker.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/BinPacker.java index 240a50661..91e57305d 100644 --- a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/BinPacker.java +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/BinPacker.java @@ -1,74 +1,20 @@ package com.linkedin.openhouse.scheduler; -import com.linkedin.openhouse.optimizer.entity.TableOperationRow; -import java.util.ArrayList; -import java.util.Comparator; +import com.linkedin.openhouse.optimizer.model.TableOperation; import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; /** - * Greedy first-fit descending bin-packer for table operations. - * - *

Tables are sorted by descending file count, then assigned to the first bin whose running total - * stays below {@code maxFilesPerBin}. A table larger than the limit gets its own bin (oversized - * bins are allowed — we never drop a table). - * - *

Tables with no stats entry are treated as file count = 0 and are still schedulable. + * Strategy for packing a set of operations into bins for batched job submission. Implementations + * encode the constraints of a particular packing dimension (file count, partition count, etc.); + * binding to an operation type is the responsibility of the scheduler configuration, not the + * strategy class. */ -public final class BinPacker { - - private BinPacker() {} +public interface BinPacker { /** - * Pack {@code pending} rows into bins. - * - * @param pending operations to pack; must not be empty - * @param fileCountByUuid map from tableUuid to number of current data files; missing entries - * default to 0 - * @param maxFilesPerBin maximum total file count per bin - * @return list of bins, each bin being a non-empty list of rows + * Pack {@code pending} into one or more bins. Each returned bin is a non-empty list of + * operations. The relative order of operations within a bin is implementation-defined; the + * returned list of bins is what the scheduler dispatches one job per. */ - public static List> pack( - List pending, Map fileCountByUuid, long maxFilesPerBin) { - - if (pending.isEmpty()) { - return List.of(); - } - - List sorted = - pending.stream() - .sorted( - Comparator.comparingLong( - (TableOperationRow r) -> fileCountByUuid.getOrDefault(r.getTableUuid(), 0L)) - .reversed()) - .collect(Collectors.toList()); - - List> bins = new ArrayList<>(); - List binTotals = new ArrayList<>(); - - for (TableOperationRow row : sorted) { - long cost = fileCountByUuid.getOrDefault(row.getTableUuid(), 0L); - - int placed = -1; - for (int i = 0; i < bins.size(); i++) { - if (binTotals.get(i) + cost <= maxFilesPerBin || binTotals.get(i) == 0) { - placed = i; - break; - } - } - - if (placed >= 0) { - bins.get(placed).add(row); - binTotals.set(placed, binTotals.get(placed) + cost); - } else { - List newBin = new ArrayList<>(); - newBin.add(row); - bins.add(newBin); - binTotals.add(cost); - } - } - - return bins; - } + List> pack(List pending); } diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/FileCountBinPacker.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/FileCountBinPacker.java new file mode 100644 index 000000000..64e14dc9f --- /dev/null +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/FileCountBinPacker.java @@ -0,0 +1,63 @@ +package com.linkedin.openhouse.scheduler; + +import com.linkedin.openhouse.optimizer.model.TableOperation; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; +import java.util.stream.Collectors; +import lombok.RequiredArgsConstructor; + +/** + * Greedy first-fit-descending bin-packer keyed on {@link TableOperation#getFileCount()}. + * + *

Operations are sorted by descending file count, then assigned to the first bin whose running + * total stays at or below {@code maxFilesPerBin}. An operation larger than the limit gets its own + * bin (oversized bins are allowed — we never drop an operation). + * + *

Operations with no file-count populated are treated as zero. + */ +@RequiredArgsConstructor +public class FileCountBinPacker implements BinPacker { + + private final long maxFilesPerBin; + + @Override + public List> pack(List pending) { + if (pending.isEmpty()) { + return List.of(); + } + + List sorted = + pending.stream() + .sorted(Comparator.comparingLong(FileCountBinPacker::cost).reversed()) + .collect(Collectors.toList()); + + List> bins = new ArrayList<>(); + List binTotals = new ArrayList<>(); + + for (TableOperation op : sorted) { + long cost = cost(op); + int placed = -1; + for (int i = 0; i < bins.size(); i++) { + if (binTotals.get(i) + cost <= maxFilesPerBin || binTotals.get(i) == 0) { + placed = i; + break; + } + } + if (placed >= 0) { + bins.get(placed).add(op); + binTotals.set(placed, binTotals.get(placed) + cost); + } else { + List newBin = new ArrayList<>(); + newBin.add(op); + bins.add(newBin); + binTotals.add(cost); + } + } + return bins; + } + + private static long cost(TableOperation op) { + return op.getFileCount() != null ? op.getFileCount() : 0L; + } +} diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerApplication.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerApplication.java index 216ae9748..b28d1b73c 100644 --- a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerApplication.java +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerApplication.java @@ -1,5 +1,7 @@ package com.linkedin.openhouse.scheduler; +import com.linkedin.openhouse.optimizer.model.OperationType; +import java.util.Map; import org.springframework.boot.CommandLineRunner; import org.springframework.boot.SpringApplication; import org.springframework.boot.autoconfigure.SpringBootApplication; @@ -17,8 +19,12 @@ public static void main(String[] args) { SpringApplication.run(SchedulerApplication.class, args); } + /** + * Runs the scheduler once per registered {@link BinPacker} per process invocation. Each call is + * scoped to one operation type. + */ @Bean - public CommandLineRunner run(SchedulerRunner runner) { - return args -> runner.schedule(); + public CommandLineRunner run(SchedulerRunner runner, Map binPackers) { + return args -> binPackers.keySet().forEach(runner::schedule); } } diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java index d6f3d404f..72817889b 100644 --- a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java @@ -1,6 +1,9 @@ package com.linkedin.openhouse.scheduler; import com.linkedin.openhouse.optimizer.entity.TableOperationRow; +import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import com.linkedin.openhouse.optimizer.model.OperationType; +import com.linkedin.openhouse.optimizer.model.TableOperation; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; import com.linkedin.openhouse.scheduler.client.JobsServiceClient; @@ -16,7 +19,12 @@ import org.springframework.stereotype.Component; import org.springframework.transaction.annotation.Transactional; -/** Reads PENDING rows from the optimizer DB, bin-packs them, and submits one Spark job per bin. */ +/** + * For one operation type per call, reads PENDING rows, enriches them with file count, dispatches to + * the registered {@link BinPacker}, and submits one Spark job per bin. The {@link + * com.linkedin.openhouse.scheduler.SchedulerApplication}'s CommandLineRunner loops over the + * registered packers and invokes {@code schedule(opType)} for each. + */ @Slf4j @Component @RequiredArgsConstructor @@ -25,97 +33,109 @@ public class SchedulerRunner { private final TableOperationsRepository operationsRepo; private final TableStatsRepository statsRepo; private final JobsServiceClient jobsClient; - - @Value("${scheduler.bin-size-max-files}") - private long maxFiles; - - @Value("${scheduler.operation-type}") - private String operationType; + private final Map binPackers; @Value("${scheduler.results-endpoint}") private String resultsEndpoint; + /** Schedule all PENDING operations of the given type across all databases. */ + public void schedule(OperationType operationType) { + schedule(operationType, Optional.empty(), Optional.empty()); + } + + /** + * Schedule PENDING operations for {@code operationType}, optionally scoped to a single database + * or table name. + */ @Transactional - public void schedule() { - List pending = - operationsRepo.find(operationType, "PENDING", null, null, null); - if (pending.isEmpty()) { + public void schedule( + OperationType operationType, Optional databaseName, Optional tableName) { + + BinPacker packer = binPackers.get(operationType); + if (packer == null) { + log.warn("No BinPacker registered for operation type {}; skipping", operationType); + return; + } + + List pendingRows = + operationsRepo.find( + operationType.name(), + "PENDING", + null, + databaseName.orElse(null), + tableName.orElse(null)); + if (pendingRows.isEmpty()) { log.info("No PENDING operations of type {}; nothing to schedule", operationType); return; } Set uuids = - pending.stream().map(TableOperationRow::getTableUuid).collect(Collectors.toSet()); - + pendingRows.stream().map(TableOperationRow::getTableUuid).collect(Collectors.toSet()); Map fileCountByUuid = statsRepo.findAllById(uuids).stream() .collect( - Collectors.toMap( - row -> row.getTableUuid(), - row -> { - if (row.getStats() == null || row.getStats().getSnapshot() == null) { - return 0L; - } - Long count = row.getStats().getSnapshot().getNumCurrentFiles(); - return count != null ? count : 0L; - })); - - List> bins = BinPacker.pack(pending, fileCountByUuid, maxFiles); + Collectors.toMap(TableStatsRow::getTableUuid, SchedulerRunner::extractFileCount)); + + List pending = + pendingRows.stream() + .map( + row -> { + TableOperation op = TableOperation.from(row); + op.setFileCount(fileCountByUuid.getOrDefault(row.getTableUuid(), 0L)); + return op; + }) + .collect(Collectors.toList()); + + List> bins = packer.pack(pending); log.info( - "Packed {} PENDING operations into {} bins (maxFiles={})", - pending.size(), - bins.size(), - maxFiles); + "Packed {} PENDING {} operations into {} bins", pending.size(), operationType, bins.size()); - bins.forEach(this::submitBin); + bins.forEach(bin -> submitBin(operationType, bin)); } - private void submitBin(List bin) { - // 0. Cancel duplicate PENDING rows per (tableUuid, operationType) — keep only the first row. - bin.stream() - .collect(Collectors.groupingBy(TableOperationRow::getTableUuid)) - .forEach( - (uuid, rows) -> { - TableOperationRow keep = rows.get(0); - operationsRepo.cancelDuplicatePending(uuid, operationType, keep.getId()); - }); - - // 1. Claim all rows (PENDING → SCHEDULING) — prevents a second scheduler instance from - // double-submitting. Any row already claimed (returns 0) is excluded from this batch. - List claimed = - bin.stream() - .filter( - r -> operationsRepo.markScheduling(r.getId(), r.getVersion(), Instant.now()) == 1) - .collect(Collectors.toList()); + private void submitBin(OperationType operationType, List bin) { + // Deduplicate PENDING rows per tableUuid for this op type, keeping the IDs in this bin. + List keepIds = bin.stream().map(TableOperation::getId).collect(Collectors.toList()); + operationsRepo.cancelDuplicatePendingBatch(operationType.name(), keepIds); - if (claimed.isEmpty()) { + // Claim the rows in one batched UPDATE: PENDING → SCHEDULING. + int claimedCount = operationsRepo.markSchedulingBatch(keepIds, Instant.now()); + if (claimedCount == 0) { log.info("All rows in bin already claimed by another scheduler instance; skipping"); return; } - // 2. Submit a job for the claimed rows only. + // Submit the job for the rows we just claimed. List tableNames = - claimed.stream() - .map(r -> r.getDatabaseName() + "." + r.getTableName()) + bin.stream() + .map(op -> op.getDatabaseName() + "." + op.getTableName()) .collect(Collectors.toList()); - List opIds = - claimed.stream().map(TableOperationRow::getId).collect(Collectors.toList()); - - String jobName = "batched-" + operationType.toLowerCase() + "-" + Instant.now().toEpochMilli(); + String jobName = + "batched-" + operationType.name().toLowerCase() + "-" + Instant.now().toEpochMilli(); Optional jobId = - jobsClient.launch(jobName, operationType, tableNames, opIds, resultsEndpoint); + jobsClient.launch(jobName, operationType.name(), tableNames, keepIds, resultsEndpoint); if (jobId.isPresent()) { - // 3. Transition SCHEDULING → SCHEDULED with the jobId. - for (TableOperationRow r : claimed) { - operationsRepo.markScheduled(r.getId(), r.getVersion() + 1, jobId.get()); - } - log.info("Submitted job {} for {} tables", jobId.get(), claimed.size()); + int updated = operationsRepo.markScheduledBatch(keepIds, jobId.get()); + log.info( + "Submitted job {} for {} tables ({} rows marked SCHEDULED)", + jobId.get(), + tableNames.size(), + updated); } else { + int reverted = operationsRepo.markPendingBatch(keepIds); log.warn( - "Job submission failed for {} SCHEDULING rows; the analyzer's scheduledTimeout will" - + " detect and overwrite stale rows", - claimed.size()); + "Job submission failed; reverted {} claimed rows back to PENDING for retry on the next" + + " pass", + reverted); + } + } + + private static long extractFileCount(TableStatsRow row) { + if (row.getStats() == null || row.getStats().getSnapshot() == null) { + return 0L; } + Long count = row.getStats().getSnapshot().getNumCurrentFiles(); + return count != null ? count : 0L; } } diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/config/SchedulerConfig.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/config/SchedulerConfig.java index afdc36240..6ee16b42a 100644 --- a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/config/SchedulerConfig.java +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/config/SchedulerConfig.java @@ -1,6 +1,10 @@ package com.linkedin.openhouse.scheduler.config; +import com.linkedin.openhouse.optimizer.model.OperationType; +import com.linkedin.openhouse.scheduler.BinPacker; +import com.linkedin.openhouse.scheduler.FileCountBinPacker; import com.linkedin.openhouse.scheduler.client.JobsServiceClient; +import java.util.Map; import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; @@ -15,6 +19,9 @@ public class SchedulerConfig { @Value("${scheduler.cluster-id}") private String clusterId; + @Value("${scheduler.ofd.max-files-per-bin}") + private long ofdMaxFilesPerBin; + @Bean public WebClient jobsWebClient() { return WebClient.builder().baseUrl(jobsBaseUri).build(); @@ -24,4 +31,14 @@ public WebClient jobsWebClient() { public JobsServiceClient jobsServiceClient(WebClient jobsWebClient) { return new JobsServiceClient(jobsWebClient, clusterId); } + + /** + * Map of {@link OperationType} to the {@link BinPacker} strategy that handles it. Adding a new + * operation type means adding an entry here and configuring its packer; the strategy class itself + * stays generic. + */ + @Bean + public Map binPackers() { + return Map.of(OperationType.ORPHAN_FILES_DELETION, new FileCountBinPacker(ofdMaxFilesPerBin)); + } } diff --git a/apps/optimizer-scheduler/src/main/resources/application.properties b/apps/optimizer-scheduler/src/main/resources/application.properties index 75074b0fe..4d849797c 100644 --- a/apps/optimizer-scheduler/src/main/resources/application.properties +++ b/apps/optimizer-scheduler/src/main/resources/application.properties @@ -5,7 +5,6 @@ spring.datasource.username=${OPTIMIZER_DB_USER:sa} spring.datasource.password=${OPTIMIZER_DB_PASSWORD:} spring.jpa.hibernate.ddl-auto=none jobs.base-uri=${JOBS_BASE_URI:http://localhost:8002} -scheduler.bin-size-max-files=${SCHEDULER_BIN_SIZE_MAX_FILES:1000000} -scheduler.operation-type=${SCHEDULER_OPERATION_TYPE:ORPHAN_FILES_DELETION} +scheduler.ofd.max-files-per-bin=${SCHEDULER_OFD_MAX_FILES_PER_BIN:1000000} scheduler.results-endpoint=${SCHEDULER_RESULTS_ENDPOINT:http://openhouse-optimizer:8080/v1/table-operations} scheduler.cluster-id=${SCHEDULER_CLUSTER_ID:LocalHadoopCluster} diff --git a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/BinPackerTest.java b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/BinPackerTest.java deleted file mode 100644 index 69ea4d3bc..000000000 --- a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/BinPackerTest.java +++ /dev/null @@ -1,104 +0,0 @@ -package com.linkedin.openhouse.scheduler; - -import static org.assertj.core.api.Assertions.assertThat; - -import com.linkedin.openhouse.optimizer.entity.TableOperationRow; -import java.util.List; -import java.util.Map; -import java.util.UUID; -import org.junit.jupiter.api.Test; - -class BinPackerTest { - - private static TableOperationRow row(String uuid) { - return TableOperationRow.builder() - .id(UUID.randomUUID().toString()) - .tableUuid(uuid) - .databaseName("db") - .tableName("tbl_" + uuid) - .operationType("ORPHAN_FILES_DELETION") - .status("PENDING") - .build(); - } - - @Test - void emptyInput_returnsEmptyBins() { - assertThat(BinPacker.pack(List.of(), Map.of(), 1_000_000L)).isEmpty(); - } - - @Test - void singleTable_oneBin() { - TableOperationRow r = row("uuid-1"); - List> bins = - BinPacker.pack(List.of(r), Map.of("uuid-1", 100L), 1_000_000L); - - assertThat(bins).hasSize(1); - assertThat(bins.get(0)).containsExactly(r); - } - - @Test - void tablesUnderLimit_oneBin() { - TableOperationRow r1 = row("a"); - TableOperationRow r2 = row("b"); - TableOperationRow r3 = row("c"); - - List> bins = - BinPacker.pack( - List.of(r1, r2, r3), Map.of("a", 300_000L, "b", 300_000L, "c", 300_000L), 1_000_000L); - - assertThat(bins).hasSize(1); - assertThat(bins.get(0)).hasSize(3); - } - - @Test - void tablesOverLimit_twoBins() { - TableOperationRow r1 = row("a"); - TableOperationRow r2 = row("b"); - TableOperationRow r3 = row("c"); - - // 600k + 600k would exceed 1M; 400k fits after 600k - List> bins = - BinPacker.pack( - List.of(r1, r2, r3), Map.of("a", 600_000L, "b", 600_000L, "c", 400_000L), 1_000_000L); - - assertThat(bins).hasSize(2); - assertThat(bins.get(0)).hasSize(2); // 600k + 400k - assertThat(bins.get(1)).hasSize(1); // 600k alone - } - - @Test - void largeTableAlone_exceedsLimitSingleBin() { - TableOperationRow r = row("big"); - List> bins = - BinPacker.pack(List.of(r), Map.of("big", 5_000_000L), 1_000_000L); - - assertThat(bins).hasSize(1); - assertThat(bins.get(0)).containsExactly(r); - } - - @Test - void noStats_fileCountZero_groupedNormally() { - TableOperationRow r1 = row("x"); - TableOperationRow r2 = row("y"); - - // No stats entries — both get cost 0, both fit in one bin - List> bins = BinPacker.pack(List.of(r1, r2), Map.of(), 1_000_000L); - - assertThat(bins).hasSize(1); - assertThat(bins.get(0)).hasSize(2); - } - - @Test - void sortedDescending_largestTablesFirst() { - TableOperationRow small = row("small"); - TableOperationRow large = row("large"); - - List> bins = - BinPacker.pack(List.of(small, large), Map.of("small", 100L, "large", 900_000L), 1_000_000L); - - // Both fit in one bin, large should appear first due to descending sort - assertThat(bins).hasSize(1); - assertThat(bins.get(0).get(0).getTableUuid()).isEqualTo("large"); - assertThat(bins.get(0).get(1).getTableUuid()).isEqualTo("small"); - } -} diff --git a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/FileCountBinPackerTest.java b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/FileCountBinPackerTest.java new file mode 100644 index 000000000..cc77cdff1 --- /dev/null +++ b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/FileCountBinPackerTest.java @@ -0,0 +1,87 @@ +package com.linkedin.openhouse.scheduler; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.linkedin.openhouse.optimizer.model.OperationType; +import com.linkedin.openhouse.optimizer.model.TableOperation; +import java.util.List; +import java.util.UUID; +import org.junit.jupiter.api.Test; + +class FileCountBinPackerTest { + + private static final long MAX = 1_000_000L; + private final FileCountBinPacker packer = new FileCountBinPacker(MAX); + + private static TableOperation op(String uuid, Long fileCount) { + return TableOperation.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(uuid) + .databaseName("db") + .tableName("tbl_" + uuid) + .operationType(OperationType.ORPHAN_FILES_DELETION) + .fileCount(fileCount) + .build(); + } + + @Test + void emptyInput_returnsEmptyBins() { + assertThat(packer.pack(List.of())).isEmpty(); + } + + @Test + void singleTable_oneBin() { + TableOperation o = op("uuid-1", 100L); + List> bins = packer.pack(List.of(o)); + assertThat(bins).hasSize(1); + assertThat(bins.get(0)).containsExactly(o); + } + + @Test + void tablesUnderLimit_oneBin() { + TableOperation a = op("a", 300_000L); + TableOperation b = op("b", 300_000L); + TableOperation c = op("c", 300_000L); + List> bins = packer.pack(List.of(a, b, c)); + assertThat(bins).hasSize(1); + assertThat(bins.get(0)).hasSize(3); + } + + @Test + void tablesOverLimit_twoBins() { + TableOperation a = op("a", 600_000L); + TableOperation b = op("b", 600_000L); + TableOperation c = op("c", 400_000L); + List> bins = packer.pack(List.of(a, b, c)); + assertThat(bins).hasSize(2); + assertThat(bins.get(0)).hasSize(2); // 600k + 400k + assertThat(bins.get(1)).hasSize(1); // 600k alone + } + + @Test + void largeTableAlone_exceedsLimitSingleBin() { + TableOperation big = op("big", 5_000_000L); + List> bins = packer.pack(List.of(big)); + assertThat(bins).hasSize(1); + assertThat(bins.get(0)).containsExactly(big); + } + + @Test + void nullFileCount_treatedAsZero() { + TableOperation x = op("x", null); + TableOperation y = op("y", null); + List> bins = packer.pack(List.of(x, y)); + assertThat(bins).hasSize(1); + assertThat(bins.get(0)).hasSize(2); + } + + @Test + void sortedDescending_largestFirst() { + TableOperation small = op("small", 100L); + TableOperation large = op("large", 900_000L); + List> bins = packer.pack(List.of(small, large)); + assertThat(bins).hasSize(1); + assertThat(bins.get(0).get(0).getTableUuid()).isEqualTo("large"); + assertThat(bins.get(0).get(1).getTableUuid()).isEqualTo("small"); + } +} diff --git a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java index bc3578572..01cd45492 100644 --- a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java +++ b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java @@ -3,28 +3,28 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyList; -import static org.mockito.ArgumentMatchers.anyLong; import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.never; -import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; import com.linkedin.openhouse.optimizer.entity.TableOperationRow; import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import com.linkedin.openhouse.optimizer.model.OperationType; +import com.linkedin.openhouse.optimizer.model.TableOperation; import com.linkedin.openhouse.optimizer.model.TableStats; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; import com.linkedin.openhouse.scheduler.client.JobsServiceClient; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.UUID; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.ArgumentCaptor; -import org.mockito.InjectMocks; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; import org.springframework.test.util.ReflectionTestUtils; @@ -32,16 +32,19 @@ @ExtendWith(MockitoExtension.class) class SchedulerRunnerTest { + private static final OperationType OFD = OperationType.ORPHAN_FILES_DELETION; + private static final String OFD_STR = OFD.name(); + @Mock private TableOperationsRepository operationsRepo; @Mock private TableStatsRepository statsRepo; @Mock private JobsServiceClient jobsClient; + @Mock private BinPacker binPacker; - @InjectMocks private SchedulerRunner runner; + private SchedulerRunner runner; @BeforeEach - void injectConfig() { - ReflectionTestUtils.setField(runner, "maxFiles", 1_000_000L); - ReflectionTestUtils.setField(runner, "operationType", "ORPHAN_FILES_DELETION"); + void setUp() { + runner = new SchedulerRunner(operationsRepo, statsRepo, jobsClient, Map.of(OFD, binPacker)); ReflectionTestUtils.setField( runner, "resultsEndpoint", "http://localhost:8080/v1/table-operations"); } @@ -52,9 +55,10 @@ private TableOperationRow pendingRow(String uuid, String db, String table) { .tableUuid(uuid) .databaseName(db) .tableName(table) - .operationType("ORPHAN_FILES_DELETION") + .operationType(OFD_STR) .status("PENDING") .version(0L) + .createdAt(java.time.Instant.now()) .build(); } @@ -68,79 +72,92 @@ private TableStatsRow statsRow(String uuid, long numCurrentFiles) { @Test void schedule_noPendingOps_noJobSubmitted() { - when(operationsRepo.find("ORPHAN_FILES_DELETION", "PENDING", null, null, null)) - .thenReturn(List.of()); + when(operationsRepo.find(OFD_STR, "PENDING", null, null, null)).thenReturn(List.of()); + + runner.schedule(OFD); + + verify(jobsClient, never()).launch(anyString(), anyString(), anyList(), anyList(), anyString()); + verify(binPacker, never()).pack(anyList()); + } + + @Test + void schedule_unknownOperationType_noOp() { + SchedulerRunner emptyRunner = + new SchedulerRunner(operationsRepo, statsRepo, jobsClient, Map.of()); + ReflectionTestUtils.setField(emptyRunner, "resultsEndpoint", "x"); - runner.schedule(); + emptyRunner.schedule(OFD); + verify(operationsRepo, never()).find(anyString(), anyString(), any(), any(), any()); verify(jobsClient, never()).launch(anyString(), anyString(), anyList(), anyList(), anyString()); } @Test - void schedule_twoStepClaim_claimsAndSchedules() { + void schedule_singleBin_claimsAndMarksScheduled() { String uuid = UUID.randomUUID().toString(); TableOperationRow row = pendingRow(uuid, "db1", "tbl1"); - when(operationsRepo.find("ORPHAN_FILES_DELETION", "PENDING", null, null, null)) - .thenReturn(List.of(row)); + when(operationsRepo.find(OFD_STR, "PENDING", null, null, null)).thenReturn(List.of(row)); when(statsRepo.findAllById(any())).thenReturn(List.of(statsRow(uuid, 100_000L))); - when(operationsRepo.markScheduling(anyString(), anyLong(), any())).thenReturn(1); - when(operationsRepo.markScheduled(anyString(), anyLong(), anyString())).thenReturn(1); + when(binPacker.pack(anyList())) + .thenAnswer(inv -> List.of(inv.>getArgument(0))); + when(operationsRepo.markSchedulingBatch(anyList(), any())).thenReturn(1); + when(operationsRepo.markScheduledBatch(anyList(), anyString())).thenReturn(1); when(jobsClient.launch(anyString(), anyString(), anyList(), anyList(), anyString())) .thenReturn(Optional.of("job-123")); - runner.schedule(); + runner.schedule(OFD); + + ArgumentCaptor> ids = ArgumentCaptor.forClass(List.class); + verify(operationsRepo).markSchedulingBatch(ids.capture(), any()); + assertThat(ids.getValue()).containsExactly(row.getId()); - // Step 1: PENDING → SCHEDULING - verify(operationsRepo).markScheduling(eq(row.getId()), eq(0L), any()); - // Step 2: SCHEDULING → SCHEDULED with jobId - verify(operationsRepo).markScheduled(eq(row.getId()), eq(1L), eq("job-123")); + verify(operationsRepo).markScheduledBatch(eq(List.of(row.getId())), eq("job-123")); + verify(operationsRepo, never()).markPendingBatch(anyList()); - ArgumentCaptor> tableNamesCaptor = ArgumentCaptor.forClass(List.class); - ArgumentCaptor> opIdsCaptor = ArgumentCaptor.forClass(List.class); + ArgumentCaptor> tableNames = ArgumentCaptor.forClass(List.class); verify(jobsClient) - .launch( - anyString(), - eq("ORPHAN_FILES_DELETION"), - tableNamesCaptor.capture(), - opIdsCaptor.capture(), - anyString()); - - assertThat(tableNamesCaptor.getValue()).containsExactly("db1.tbl1"); - assertThat(opIdsCaptor.getValue()).containsExactly(row.getId()); + .launch(anyString(), eq(OFD_STR), tableNames.capture(), anyList(), anyString()); + assertThat(tableNames.getValue()).containsExactly("db1.tbl1"); } @Test - void schedule_jobLaunchFails_rowsStayScheduling() { + void schedule_jobLaunchFails_marksPendingForRetry() { String uuid = UUID.randomUUID().toString(); TableOperationRow row = pendingRow(uuid, "db1", "tbl1"); - when(operationsRepo.find("ORPHAN_FILES_DELETION", "PENDING", null, null, null)) - .thenReturn(List.of(row)); + when(operationsRepo.find(OFD_STR, "PENDING", null, null, null)).thenReturn(List.of(row)); when(statsRepo.findAllById(any())).thenReturn(List.of()); - when(operationsRepo.markScheduling(anyString(), anyLong(), any())).thenReturn(1); + when(binPacker.pack(anyList())) + .thenAnswer(inv -> List.of(inv.>getArgument(0))); + when(operationsRepo.markSchedulingBatch(anyList(), any())).thenReturn(1); when(jobsClient.launch(anyString(), anyString(), anyList(), anyList(), anyString())) .thenReturn(Optional.empty()); + when(operationsRepo.markPendingBatch(anyList())).thenReturn(1); - runner.schedule(); + runner.schedule(OFD); - verify(operationsRepo).markScheduling(eq(row.getId()), eq(0L), any()); - verify(operationsRepo, never()).markScheduled(anyString(), anyLong(), anyString()); + verify(operationsRepo).markSchedulingBatch(eq(List.of(row.getId())), any()); + verify(operationsRepo).markPendingBatch(eq(List.of(row.getId()))); + verify(operationsRepo, never()).markScheduledBatch(anyList(), anyString()); } @Test - void schedule_rowAlreadyClaimed_skipsSubmit() { + void schedule_rowsAlreadyClaimed_skipsSubmit() { String uuid = UUID.randomUUID().toString(); TableOperationRow row = pendingRow(uuid, "db1", "tbl1"); - when(operationsRepo.find("ORPHAN_FILES_DELETION", "PENDING", null, null, null)) - .thenReturn(List.of(row)); + when(operationsRepo.find(OFD_STR, "PENDING", null, null, null)).thenReturn(List.of(row)); when(statsRepo.findAllById(any())).thenReturn(List.of()); - when(operationsRepo.markScheduling(anyString(), anyLong(), any())).thenReturn(0); + when(binPacker.pack(anyList())) + .thenAnswer(inv -> List.of(inv.>getArgument(0))); + when(operationsRepo.markSchedulingBatch(anyList(), any())).thenReturn(0); - runner.schedule(); + runner.schedule(OFD); verify(jobsClient, never()).launch(anyString(), anyString(), anyList(), anyList(), anyString()); + verify(operationsRepo, never()).markScheduledBatch(anyList(), anyString()); + verify(operationsRepo, never()).markPendingBatch(anyList()); } @Test @@ -149,38 +166,17 @@ void schedule_cancelsDuplicatePendingBeforeClaim() { TableOperationRow row1 = pendingRow(uuid, "db1", "tbl1"); TableOperationRow row2 = pendingRow(uuid, "db1", "tbl1"); - when(operationsRepo.find("ORPHAN_FILES_DELETION", "PENDING", null, null, null)) - .thenReturn(List.of(row1, row2)); + when(operationsRepo.find(OFD_STR, "PENDING", null, null, null)).thenReturn(List.of(row1, row2)); when(statsRepo.findAllById(any())).thenReturn(List.of()); - when(operationsRepo.markScheduling(anyString(), anyLong(), any())).thenReturn(1); - when(operationsRepo.markScheduled(anyString(), anyLong(), anyString())).thenReturn(1); + when(binPacker.pack(anyList())) + .thenAnswer(inv -> List.of(inv.>getArgument(0))); + when(operationsRepo.markSchedulingBatch(anyList(), any())).thenReturn(2); + when(operationsRepo.markScheduledBatch(anyList(), anyString())).thenReturn(2); when(jobsClient.launch(anyString(), anyString(), anyList(), anyList(), anyString())) .thenReturn(Optional.of("job-789")); - runner.schedule(); - - verify(operationsRepo) - .cancelDuplicatePending(eq(uuid), eq("ORPHAN_FILES_DELETION"), eq(row1.getId())); - } - - @Test - void schedule_claimsAllRowsInBin() { - String uuid1 = UUID.randomUUID().toString(); - String uuid2 = UUID.randomUUID().toString(); - TableOperationRow row1 = pendingRow(uuid1, "db1", "tbl1"); - TableOperationRow row2 = pendingRow(uuid2, "db1", "tbl2"); - - when(operationsRepo.find("ORPHAN_FILES_DELETION", "PENDING", null, null, null)) - .thenReturn(List.of(row1, row2)); - when(statsRepo.findAllById(any())).thenReturn(List.of()); - when(operationsRepo.markScheduling(anyString(), anyLong(), any())).thenReturn(1); - when(operationsRepo.markScheduled(anyString(), anyLong(), anyString())).thenReturn(1); - when(jobsClient.launch(anyString(), anyString(), anyList(), anyList(), anyString())) - .thenReturn(Optional.of("job-456")); - - runner.schedule(); + runner.schedule(OFD); - verify(operationsRepo, times(2)).markScheduling(anyString(), eq(0L), any()); - verify(operationsRepo, times(2)).markScheduled(anyString(), eq(1L), eq("job-456")); + verify(operationsRepo).cancelDuplicatePendingBatch(eq(OFD_STR), anyList()); } } diff --git a/apps/optimizer-scheduler/src/test/resources/application-test.properties b/apps/optimizer-scheduler/src/test/resources/application-test.properties index 3dcec13da..5904d71ba 100644 --- a/apps/optimizer-scheduler/src/test/resources/application-test.properties +++ b/apps/optimizer-scheduler/src/test/resources/application-test.properties @@ -5,7 +5,6 @@ spring.jpa.hibernate.ddl-auto=none spring.sql.init.mode=always spring.sql.init.schema-locations=classpath:schema.sql jobs.base-uri=http://localhost:9999 -scheduler.bin-size-max-files=1000000 -scheduler.operation-type=ORPHAN_FILES_DELETION +scheduler.ofd.max-files-per-bin=1000000 scheduler.results-endpoint=http://localhost:8080/v1/table-operations scheduler.cluster-id=test-cluster diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java index 0ba241b97..26d2d8c1e 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -70,4 +70,53 @@ int markScheduling( + "WHERE r.id = :id AND r.version = :version AND r.status = 'SCHEDULING'") int markScheduled( @Param("id") String id, @Param("version") long version, @Param("jobId") String jobId); + + /** + * Batch CAS: PENDING → SCHEDULING for every {@code id} still in PENDING. Returns the number of + * rows transitioned. Rows already claimed by another instance are skipped silently; callers must + * re-query if they need the precise list. + */ + @Modifying + @Query( + "UPDATE TableOperationRow r " + + "SET r.status = 'SCHEDULING', r.scheduledAt = :scheduledAt, r.version = r.version + 1 " + + "WHERE r.id IN :ids AND r.status = 'PENDING'") + int markSchedulingBatch( + @Param("ids") List ids, @Param("scheduledAt") Instant scheduledAt); + + /** + * Batch CAS: SCHEDULING → SCHEDULED with the given {@code jobId} for every {@code id} still in + * SCHEDULING. Returns the number of rows transitioned. + */ + @Modifying + @Query( + "UPDATE TableOperationRow r " + + "SET r.status = 'SCHEDULED', r.jobId = :jobId, r.version = r.version + 1 " + + "WHERE r.id IN :ids AND r.status = 'SCHEDULING'") + int markScheduledBatch(@Param("ids") List ids, @Param("jobId") String jobId); + + /** + * Batch transition: SCHEDULING → PENDING for every {@code id} still in SCHEDULING. Used by the + * scheduler to release claimed rows when job submission fails so the next pass can retry. Returns + * the number of rows reverted. + */ + @Modifying + @Query( + "UPDATE TableOperationRow r " + + "SET r.status = 'PENDING', r.scheduledAt = NULL, r.version = r.version + 1 " + + "WHERE r.id IN :ids AND r.status = 'SCHEDULING'") + int markPendingBatch(@Param("ids") List ids); + + /** + * Batch-delete duplicate PENDING rows for the given operation type, keeping only the IDs in + * {@code keepIds}. Used by the scheduler to deduplicate before claiming. + */ + @Modifying + @Query( + "DELETE FROM TableOperationRow r " + + "WHERE r.operationType = :operationType " + + "AND r.status = 'PENDING' " + + "AND r.id NOT IN :keepIds") + int cancelDuplicatePendingBatch( + @Param("operationType") String operationType, @Param("keepIds") List keepIds); } From 8142af3d242e87d0ef1020ae0787031647fc3b2e Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 10:13:05 -0700 Subject: [PATCH 03/19] refactor(optimizer-scheduler): depend on :services:optimizer; drop dead CAS methods - apps/optimizer-scheduler/build.gradle: swap the now-removed `:apps:optimizer-data` dep for `:services:optimizer`. - TableOperationsRepository: remove the single-row `markScheduling`, `markScheduled`, and `cancelDuplicatePending` methods. They have no production callers; the scheduler exclusively uses the `*Batch` variants. Update the javadoc on TableOperationsRow.version accordingly. --- apps/optimizer-scheduler/build.gradle | 2 +- .../optimizer/entity/TableOperationsRow.java | 9 ++-- .../repository/TableOperationsRepository.java | 42 ------------------- 3 files changed, 6 insertions(+), 47 deletions(-) diff --git a/apps/optimizer-scheduler/build.gradle b/apps/optimizer-scheduler/build.gradle index 391b400c3..48def1c0c 100644 --- a/apps/optimizer-scheduler/build.gradle +++ b/apps/optimizer-scheduler/build.gradle @@ -4,7 +4,7 @@ plugins { } dependencies { - implementation project(':apps:optimizer-data') + implementation project(':services:optimizer') implementation 'org.springframework.boot:spring-boot-starter:2.7.8' implementation 'org.springframework.boot:spring-boot-starter-webflux:2.7.8' implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8' diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java index 0e23761ae..a5444ab5a 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java @@ -51,10 +51,11 @@ public class TableOperationsRow { /** * Monotonically-increasing version for application-level optimistic concurrency control. The - * scheduler's CAS transitions (e.g. {@code markScheduling}, {@code markScheduled}) match this - * value in the WHERE clause and bump it by one on UPDATE, ensuring two scheduler instances can't - * both move the same row out of PENDING. Not managed by JPA optimistic locking — kept as a plain - * column so the WHERE-clause-based CAS pattern works portably across MySQL and H2. + * scheduler's batch CAS transitions (e.g. {@code markSchedulingBatch}, {@code + * markScheduledBatch}) match this value in the WHERE clause and bump it by one on UPDATE, + * ensuring two scheduler instances can't both move the same row out of PENDING. Not managed by + * JPA optimistic locking — kept as a plain column so the WHERE-clause-based CAS pattern works + * portably across MySQL and H2. */ @Column(name = "version") private Long version; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java index 7d580df3d..1f93c5740 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -29,48 +29,6 @@ List find( @Param("databaseName") String databaseName, @Param("tableName") String tableName); - /** - * Delete duplicate PENDING rows for the same (tableUuid, operationType), keeping only the - * specified row. Used by the Scheduler to deduplicate before claiming. - */ - @Modifying - @Query( - "DELETE FROM TableOperationsRow r " - + "WHERE r.tableUuid = :tableUuid " - + "AND r.operationType = :operationType " - + "AND r.status = 'PENDING' " - + "AND r.id <> :keepId") - int cancelDuplicatePending( - @Param("tableUuid") String tableUuid, - @Param("operationType") String operationType, - @Param("keepId") String keepId); - - /** - * CAS transition: PENDING → SCHEDULING. Returns 1 if the row was claimed, 0 if already claimed by - * another instance or the version has changed. - */ - @Modifying - @Query( - "UPDATE TableOperationsRow r " - + "SET r.status = 'SCHEDULING', r.scheduledAt = :scheduledAt, r.version = r.version + 1 " - + "WHERE r.id = :id AND r.version = :version AND r.status = 'PENDING'") - int markScheduling( - @Param("id") String id, - @Param("version") long version, - @Param("scheduledAt") Instant scheduledAt); - - /** - * CAS transition: SCHEDULING → SCHEDULED with the external job ID. Returns 1 on success, 0 if the - * row is no longer in SCHEDULING state at the expected version. - */ - @Modifying - @Query( - "UPDATE TableOperationsRow r " - + "SET r.status = 'SCHEDULED', r.jobId = :jobId, r.version = r.version + 1 " - + "WHERE r.id = :id AND r.version = :version AND r.status = 'SCHEDULING'") - int markScheduled( - @Param("id") String id, @Param("version") long version, @Param("jobId") String jobId); - /** * Batch CAS: PENDING → SCHEDULING for every {@code id} still in PENDING. Returns the number of * rows transitioned. Rows already claimed by another instance are skipped silently; callers must From 3fee7abba4e043d6539a01bde9c2055c4a5f5808 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 15:12:41 -0700 Subject: [PATCH 04/19] refactor(optimizer-scheduler): consume model/ types and ModelDbMapper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adapt the scheduler to the entity→db rename, the removal of factory methods on model/ data types, and the db/-typed repository signatures. - SchedulerRunner: inject ModelDbMapper. Imports switch from entity/ to db/. operationsRepo.find now takes (db.OperationType, db.OperationStatus, ...). Row → model conversion via dbMapper.toOperation. extractFileCount reads row.getSnapshot() directly — the row no longer wraps a TableStats envelope. cancelDuplicatePendingBatch takes a db.OperationType. - SchedulerRunnerTest rewritten: db-typed enums + new ModelDbMapper. --- .../openhouse/scheduler/SchedulerRunner.java | 20 +++++---- .../scheduler/SchedulerRunnerTest.java | 45 +++++++++++-------- 2 files changed, 38 insertions(+), 27 deletions(-) diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java index ccc4e0335..590ffddb6 100644 --- a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java @@ -1,9 +1,10 @@ package com.linkedin.openhouse.scheduler; -import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; -import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import com.linkedin.openhouse.optimizer.db.TableOperationsRow; +import com.linkedin.openhouse.optimizer.db.TableStatsRow; import com.linkedin.openhouse.optimizer.model.OperationType; import com.linkedin.openhouse.optimizer.model.TableOperation; +import com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; import com.linkedin.openhouse.scheduler.client.JobsServiceClient; @@ -34,6 +35,7 @@ public class SchedulerRunner { private final TableStatsRepository statsRepo; private final JobsServiceClient jobsClient; private final Map binPackers; + private final ModelDbMapper dbMapper; @Value("${scheduler.results-endpoint}") private String resultsEndpoint; @@ -57,10 +59,12 @@ public void schedule( return; } + com.linkedin.openhouse.optimizer.db.OperationType dbOperationType = + dbMapper.toDbOperationType(operationType); List pendingRows = operationsRepo.find( - operationType.name(), - "PENDING", + dbOperationType, + com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING, null, databaseName.orElse(null), tableName.orElse(null)); @@ -80,7 +84,7 @@ public void schedule( pendingRows.stream() .map( row -> { - TableOperation op = TableOperation.from(row); + TableOperation op = dbMapper.toOperation(row); op.setFileCount(fileCountByUuid.getOrDefault(row.getTableUuid(), 0L)); return op; }) @@ -96,7 +100,7 @@ public void schedule( private void submitBin(OperationType operationType, List bin) { // Deduplicate PENDING rows per tableUuid for this op type, keeping the IDs in this bin. List keepIds = bin.stream().map(TableOperation::getId).collect(Collectors.toList()); - operationsRepo.cancelDuplicatePendingBatch(operationType.name(), keepIds); + operationsRepo.cancelDuplicatePendingBatch(dbMapper.toDbOperationType(operationType), keepIds); // Claim the rows in one batched UPDATE: PENDING → SCHEDULING. int claimedCount = operationsRepo.markSchedulingBatch(keepIds, Instant.now()); @@ -132,10 +136,10 @@ private void submitBin(OperationType operationType, List bin) { } private static long extractFileCount(TableStatsRow row) { - if (row.getStats() == null || row.getStats().getSnapshot() == null) { + if (row.getSnapshot() == null) { return 0L; } - Long count = row.getStats().getSnapshot().getNumCurrentFiles(); + Long count = row.getSnapshot().getNumCurrentFiles(); return count != null ? count : 0L; } } diff --git a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java index 6a744a36f..74db76438 100644 --- a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java +++ b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java @@ -9,11 +9,12 @@ import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; -import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; -import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import com.linkedin.openhouse.optimizer.db.SnapshotMetrics; +import com.linkedin.openhouse.optimizer.db.TableOperationsRow; +import com.linkedin.openhouse.optimizer.db.TableStatsRow; import com.linkedin.openhouse.optimizer.model.OperationType; import com.linkedin.openhouse.optimizer.model.TableOperation; -import com.linkedin.openhouse.optimizer.model.TableStats; +import com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; import com.linkedin.openhouse.scheduler.client.JobsServiceClient; @@ -33,6 +34,10 @@ class SchedulerRunnerTest { private static final OperationType OFD = OperationType.ORPHAN_FILES_DELETION; + private static final com.linkedin.openhouse.optimizer.db.OperationType OFD_DB = + com.linkedin.openhouse.optimizer.db.OperationType.ORPHAN_FILES_DELETION; + private static final com.linkedin.openhouse.optimizer.db.OperationStatus PENDING_DB = + com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING; private static final String OFD_STR = OFD.name(); @Mock private TableOperationsRepository operationsRepo; @@ -40,11 +45,14 @@ class SchedulerRunnerTest { @Mock private JobsServiceClient jobsClient; @Mock private BinPacker binPacker; + private final ModelDbMapper dbMapper = new ModelDbMapper(); private SchedulerRunner runner; @BeforeEach void setUp() { - runner = new SchedulerRunner(operationsRepo, statsRepo, jobsClient, Map.of(OFD, binPacker)); + runner = + new SchedulerRunner( + operationsRepo, statsRepo, jobsClient, Map.of(OFD, binPacker), dbMapper); ReflectionTestUtils.setField( runner, "resultsEndpoint", "http://localhost:8080/v1/table-operations"); } @@ -55,24 +63,23 @@ private TableOperationsRow pendingRow(String uuid, String db, String table) { .tableUuid(uuid) .databaseName(db) .tableName(table) - .operationType(OFD_STR) - .status("PENDING") + .operationType(OFD_DB) + .status(PENDING_DB) .version(0L) .createdAt(java.time.Instant.now()) .build(); } private TableStatsRow statsRow(String uuid, long numCurrentFiles) { - TableStats stats = - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.builder().numCurrentFiles(numCurrentFiles).build()) - .build(); - return TableStatsRow.builder().tableUuid(uuid).stats(stats).build(); + return TableStatsRow.builder() + .tableUuid(uuid) + .snapshot(SnapshotMetrics.builder().numCurrentFiles(numCurrentFiles).build()) + .build(); } @Test void schedule_noPendingOps_noJobSubmitted() { - when(operationsRepo.find(OFD_STR, "PENDING", null, null, null)).thenReturn(List.of()); + when(operationsRepo.find(OFD_DB, PENDING_DB, null, null, null)).thenReturn(List.of()); runner.schedule(OFD); @@ -83,12 +90,12 @@ void schedule_noPendingOps_noJobSubmitted() { @Test void schedule_unknownOperationType_noOp() { SchedulerRunner emptyRunner = - new SchedulerRunner(operationsRepo, statsRepo, jobsClient, Map.of()); + new SchedulerRunner(operationsRepo, statsRepo, jobsClient, Map.of(), dbMapper); ReflectionTestUtils.setField(emptyRunner, "resultsEndpoint", "x"); emptyRunner.schedule(OFD); - verify(operationsRepo, never()).find(anyString(), anyString(), any(), any(), any()); + verify(operationsRepo, never()).find(any(), any(), any(), any(), any()); verify(jobsClient, never()).launch(anyString(), anyString(), anyList(), anyList(), anyString()); } @@ -97,7 +104,7 @@ void schedule_singleBin_claimsAndMarksScheduled() { String uuid = UUID.randomUUID().toString(); TableOperationsRow row = pendingRow(uuid, "db1", "tbl1"); - when(operationsRepo.find(OFD_STR, "PENDING", null, null, null)).thenReturn(List.of(row)); + when(operationsRepo.find(OFD_DB, PENDING_DB, null, null, null)).thenReturn(List.of(row)); when(statsRepo.findAllById(any())).thenReturn(List.of(statsRow(uuid, 100_000L))); when(binPacker.pack(anyList())) .thenAnswer(inv -> List.of(inv.>getArgument(0))); @@ -126,7 +133,7 @@ void schedule_jobLaunchFails_marksPendingForRetry() { String uuid = UUID.randomUUID().toString(); TableOperationsRow row = pendingRow(uuid, "db1", "tbl1"); - when(operationsRepo.find(OFD_STR, "PENDING", null, null, null)).thenReturn(List.of(row)); + when(operationsRepo.find(OFD_DB, PENDING_DB, null, null, null)).thenReturn(List.of(row)); when(statsRepo.findAllById(any())).thenReturn(List.of()); when(binPacker.pack(anyList())) .thenAnswer(inv -> List.of(inv.>getArgument(0))); @@ -147,7 +154,7 @@ void schedule_rowsAlreadyClaimed_skipsSubmit() { String uuid = UUID.randomUUID().toString(); TableOperationsRow row = pendingRow(uuid, "db1", "tbl1"); - when(operationsRepo.find(OFD_STR, "PENDING", null, null, null)).thenReturn(List.of(row)); + when(operationsRepo.find(OFD_DB, PENDING_DB, null, null, null)).thenReturn(List.of(row)); when(statsRepo.findAllById(any())).thenReturn(List.of()); when(binPacker.pack(anyList())) .thenAnswer(inv -> List.of(inv.>getArgument(0))); @@ -166,7 +173,7 @@ void schedule_cancelsDuplicatePendingBeforeClaim() { TableOperationsRow row1 = pendingRow(uuid, "db1", "tbl1"); TableOperationsRow row2 = pendingRow(uuid, "db1", "tbl1"); - when(operationsRepo.find(OFD_STR, "PENDING", null, null, null)).thenReturn(List.of(row1, row2)); + when(operationsRepo.find(OFD_DB, PENDING_DB, null, null, null)).thenReturn(List.of(row1, row2)); when(statsRepo.findAllById(any())).thenReturn(List.of()); when(binPacker.pack(anyList())) .thenAnswer(inv -> List.of(inv.>getArgument(0))); @@ -177,6 +184,6 @@ void schedule_cancelsDuplicatePendingBeforeClaim() { runner.schedule(OFD); - verify(operationsRepo).cancelDuplicatePendingBatch(eq(OFD_STR), anyList()); + verify(operationsRepo).cancelDuplicatePendingBatch(eq(OFD_DB), anyList()); } } From b2fd3213448677812659e09ba834e21c21a1a9d1 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 15:35:48 -0700 Subject: [PATCH 05/19] fix(optimizer-scheduler): drop .version(0L) from SchedulerRunnerTest TableOperationsRow no longer carries a `version` field (the batch CAS pattern's atomicity is on status alone). The mock-built test row was still calling .version(0L), which now refers to a removed field. --- .../com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java index 74db76438..924ec432e 100644 --- a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java +++ b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java @@ -65,7 +65,6 @@ private TableOperationsRow pendingRow(String uuid, String db, String table) { .tableName(table) .operationType(OFD_DB) .status(PENDING_DB) - .version(0L) .createdAt(java.time.Instant.now()) .build(); } From de9b0e1274a78cf345377dce0447b59d1f98dc0f Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 16:14:50 -0700 Subject: [PATCH 06/19] refactor(optimizer-scheduler): use type to/from methods; drop ModelDbMapper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SchedulerRunner no longer injects ModelDbMapper. Conversion goes through the model types' static factories and instance methods: - Row → model: TableOperation.fromRow. - model.OperationType → db.OperationType via operationType.toDb(). Test: drop the ModelDbMapper field and the extra constructor argument. --- .../linkedin/openhouse/scheduler/SchedulerRunner.java | 9 +++------ .../openhouse/scheduler/SchedulerRunnerTest.java | 8 ++------ 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java index 590ffddb6..11d44efd9 100644 --- a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java @@ -4,7 +4,6 @@ import com.linkedin.openhouse.optimizer.db.TableStatsRow; import com.linkedin.openhouse.optimizer.model.OperationType; import com.linkedin.openhouse.optimizer.model.TableOperation; -import com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; import com.linkedin.openhouse.scheduler.client.JobsServiceClient; @@ -35,7 +34,6 @@ public class SchedulerRunner { private final TableStatsRepository statsRepo; private final JobsServiceClient jobsClient; private final Map binPackers; - private final ModelDbMapper dbMapper; @Value("${scheduler.results-endpoint}") private String resultsEndpoint; @@ -59,8 +57,7 @@ public void schedule( return; } - com.linkedin.openhouse.optimizer.db.OperationType dbOperationType = - dbMapper.toDbOperationType(operationType); + com.linkedin.openhouse.optimizer.db.OperationType dbOperationType = operationType.toDb(); List pendingRows = operationsRepo.find( dbOperationType, @@ -84,7 +81,7 @@ public void schedule( pendingRows.stream() .map( row -> { - TableOperation op = dbMapper.toOperation(row); + TableOperation op = TableOperation.fromRow(row); op.setFileCount(fileCountByUuid.getOrDefault(row.getTableUuid(), 0L)); return op; }) @@ -100,7 +97,7 @@ public void schedule( private void submitBin(OperationType operationType, List bin) { // Deduplicate PENDING rows per tableUuid for this op type, keeping the IDs in this bin. List keepIds = bin.stream().map(TableOperation::getId).collect(Collectors.toList()); - operationsRepo.cancelDuplicatePendingBatch(dbMapper.toDbOperationType(operationType), keepIds); + operationsRepo.cancelDuplicatePendingBatch(operationType.toDb(), keepIds); // Claim the rows in one batched UPDATE: PENDING → SCHEDULING. int claimedCount = operationsRepo.markSchedulingBatch(keepIds, Instant.now()); diff --git a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java index 924ec432e..7b0c47cac 100644 --- a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java +++ b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java @@ -14,7 +14,6 @@ import com.linkedin.openhouse.optimizer.db.TableStatsRow; import com.linkedin.openhouse.optimizer.model.OperationType; import com.linkedin.openhouse.optimizer.model.TableOperation; -import com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; import com.linkedin.openhouse.scheduler.client.JobsServiceClient; @@ -45,14 +44,11 @@ class SchedulerRunnerTest { @Mock private JobsServiceClient jobsClient; @Mock private BinPacker binPacker; - private final ModelDbMapper dbMapper = new ModelDbMapper(); private SchedulerRunner runner; @BeforeEach void setUp() { - runner = - new SchedulerRunner( - operationsRepo, statsRepo, jobsClient, Map.of(OFD, binPacker), dbMapper); + runner = new SchedulerRunner(operationsRepo, statsRepo, jobsClient, Map.of(OFD, binPacker)); ReflectionTestUtils.setField( runner, "resultsEndpoint", "http://localhost:8080/v1/table-operations"); } @@ -89,7 +85,7 @@ void schedule_noPendingOps_noJobSubmitted() { @Test void schedule_unknownOperationType_noOp() { SchedulerRunner emptyRunner = - new SchedulerRunner(operationsRepo, statsRepo, jobsClient, Map.of(), dbMapper); + new SchedulerRunner(operationsRepo, statsRepo, jobsClient, Map.of()); ReflectionTestUtils.setField(emptyRunner, "resultsEndpoint", "x"); emptyRunner.schedule(OFD); From 9ad8861ac2fa0951ee479cb74e6d6abdd82f86b2 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 18 May 2026 10:42:11 -0700 Subject: [PATCH 07/19] refactor(scheduler): TableStats at BinPacker interface; Bin owns its own launch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - SchedulerRunner now looks up per-table stats from TableStatsRepository at scheduling time and passes the (operations, statsByUuid) pair to BinPacker. TableOperation no longer carries a fileCount enrichment field. - BinPacker.pack(...) returns List. Bin owns the operations it groups together and has a schedule(jobsClient, resultsEndpoint) method that submits a single Spark job and returns the job id. The scheduler still handles cancel-duplicates, claim, and mark-scheduled/pending around the Bin.schedule() call. - FileCountBinPacker takes its OperationType in the constructor and projects TableStats down to a per-op long cost; the full stats payload is not retained in the returned bins. - SchedulerRunner imports db.OperationStatus instead of using FQN; the db OperationType collides with model.OperationType so it stays out of the file entirely — flow-through via operationType.toDb() at the call site. Addresses PR #534 review comments on SchedulerRunner.java:64 (imports), :75 (stats-at-interface), and BinPacker.java:19 (Bin abstraction). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../com/linkedin/openhouse/scheduler/Bin.java | 47 ++++++++ .../openhouse/scheduler/BinPacker.java | 14 ++- .../scheduler/FileCountBinPacker.java | 45 +++++--- .../openhouse/scheduler/SchedulerRunner.java | 69 ++++-------- .../scheduler/config/SchedulerConfig.java | 4 +- .../scheduler/FileCountBinPackerTest.java | 106 +++++++++++++----- .../scheduler/SchedulerRunnerTest.java | 19 ++-- 7 files changed, 201 insertions(+), 103 deletions(-) create mode 100644 apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/Bin.java diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/Bin.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/Bin.java new file mode 100644 index 000000000..96c480eba --- /dev/null +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/Bin.java @@ -0,0 +1,47 @@ +package com.linkedin.openhouse.scheduler; + +import com.linkedin.openhouse.optimizer.model.OperationType; +import com.linkedin.openhouse.optimizer.model.TableOperation; +import com.linkedin.openhouse.scheduler.client.JobsServiceClient; +import java.time.Instant; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; +import lombok.Getter; +import lombok.RequiredArgsConstructor; + +/** + * A set of operations the scheduler will submit together as a single Spark job. A bin owns its own + * launch — callers ask it to schedule itself and react to the returned job id. The surrounding + * status-update machinery (claim, mark-scheduled, revert-to-pending) lives in the scheduler because + * it is shared across all bins regardless of operation type. + */ +@RequiredArgsConstructor +public class Bin { + + @Getter private final OperationType operationType; + @Getter private final List operations; + + /** Operation UUIDs in this bin, parallel to {@link #getTableNames()}. */ + public List getOperationIds() { + return operations.stream().map(TableOperation::getId).collect(Collectors.toList()); + } + + /** Fully-qualified {@code database.table} identifiers for the operations in this bin. */ + public List getTableNames() { + return operations.stream() + .map(op -> op.getDatabaseName() + "." + op.getTableName()) + .collect(Collectors.toList()); + } + + /** + * Submit this bin as a single Spark job. Returns the job id on success, or empty on submission + * failure — the caller is responsible for the surrounding status updates. + */ + public Optional schedule(JobsServiceClient client, String resultsEndpoint) { + String jobName = + "batched-" + operationType.name().toLowerCase() + "-" + Instant.now().toEpochMilli(); + return client.launch( + jobName, operationType.name(), getTableNames(), getOperationIds(), resultsEndpoint); + } +} diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/BinPacker.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/BinPacker.java index 91e57305d..a33df289d 100644 --- a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/BinPacker.java +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/BinPacker.java @@ -1,20 +1,26 @@ package com.linkedin.openhouse.scheduler; import com.linkedin.openhouse.optimizer.model.TableOperation; +import com.linkedin.openhouse.optimizer.model.TableStats; import java.util.List; +import java.util.Map; /** * Strategy for packing a set of operations into bins for batched job submission. Implementations * encode the constraints of a particular packing dimension (file count, partition count, etc.); * binding to an operation type is the responsibility of the scheduler configuration, not the * strategy class. + * + *

{@link TableStats} is the cost source at the interface boundary. Implementations project it + * down to the minimal data needed to make their packing decision (e.g. file count for OFD) and do + * not retain the full stats payload in the returned bins. */ public interface BinPacker { /** - * Pack {@code pending} into one or more bins. Each returned bin is a non-empty list of - * operations. The relative order of operations within a bin is implementation-defined; the - * returned list of bins is what the scheduler dispatches one job per. + * Pack {@code pending} into one or more {@link Bin}s, using {@code statsByUuid} (keyed by {@link + * TableOperation#getTableUuid()}) as the cost source. Each returned bin is non-empty; the + * scheduler dispatches one Spark job per bin. */ - List> pack(List pending); + List pack(List pending, Map statsByUuid); } diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/FileCountBinPacker.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/FileCountBinPacker.java index 64e14dc9f..08b21dafd 100644 --- a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/FileCountBinPacker.java +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/FileCountBinPacker.java @@ -1,63 +1,82 @@ package com.linkedin.openhouse.scheduler; +import com.linkedin.openhouse.optimizer.model.OperationType; import com.linkedin.openhouse.optimizer.model.TableOperation; +import com.linkedin.openhouse.optimizer.model.TableStats; import java.util.ArrayList; import java.util.Comparator; import java.util.List; +import java.util.Map; import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; /** - * Greedy first-fit-descending bin-packer keyed on {@link TableOperation#getFileCount()}. + * Greedy first-fit-descending bin-packer keyed on per-table file count. File count is projected + * from the per-table {@link TableStats} passed alongside the operation list; operations whose stats + * are missing or whose snapshot reports no file count are treated as zero. * *

Operations are sorted by descending file count, then assigned to the first bin whose running * total stays at or below {@code maxFilesPerBin}. An operation larger than the limit gets its own * bin (oversized bins are allowed — we never drop an operation). - * - *

Operations with no file-count populated are treated as zero. */ @RequiredArgsConstructor public class FileCountBinPacker implements BinPacker { + private final OperationType operationType; private final long maxFilesPerBin; @Override - public List> pack(List pending) { + public List pack(List pending, Map statsByUuid) { if (pending.isEmpty()) { return List.of(); } + // Project once: each operation's packing cost is just a long. + Map costByOperationId = + pending.stream() + .collect(Collectors.toMap(TableOperation::getId, op -> cost(op, statsByUuid))); + List sorted = pending.stream() - .sorted(Comparator.comparingLong(FileCountBinPacker::cost).reversed()) + .sorted( + Comparator.comparingLong((TableOperation op) -> costByOperationId.get(op.getId())) + .reversed()) .collect(Collectors.toList()); - List> bins = new ArrayList<>(); + List> binContents = new ArrayList<>(); List binTotals = new ArrayList<>(); for (TableOperation op : sorted) { - long cost = cost(op); + long cost = costByOperationId.get(op.getId()); int placed = -1; - for (int i = 0; i < bins.size(); i++) { + for (int i = 0; i < binContents.size(); i++) { if (binTotals.get(i) + cost <= maxFilesPerBin || binTotals.get(i) == 0) { placed = i; break; } } if (placed >= 0) { - bins.get(placed).add(op); + binContents.get(placed).add(op); binTotals.set(placed, binTotals.get(placed) + cost); } else { List newBin = new ArrayList<>(); newBin.add(op); - bins.add(newBin); + binContents.add(newBin); binTotals.add(cost); } } - return bins; + + return binContents.stream() + .map(ops -> new Bin(operationType, ops)) + .collect(Collectors.toList()); } - private static long cost(TableOperation op) { - return op.getFileCount() != null ? op.getFileCount() : 0L; + private static long cost(TableOperation op, Map statsByUuid) { + TableStats stats = statsByUuid.get(op.getTableUuid()); + if (stats == null || stats.getSnapshot() == null) { + return 0L; + } + Long n = stats.getSnapshot().getNumCurrentFiles(); + return n != null ? n : 0L; } } diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java index 11d44efd9..6914001fc 100644 --- a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java @@ -1,9 +1,11 @@ package com.linkedin.openhouse.scheduler; +import com.linkedin.openhouse.optimizer.db.OperationStatus; import com.linkedin.openhouse.optimizer.db.TableOperationsRow; import com.linkedin.openhouse.optimizer.db.TableStatsRow; import com.linkedin.openhouse.optimizer.model.OperationType; import com.linkedin.openhouse.optimizer.model.TableOperation; +import com.linkedin.openhouse.optimizer.model.TableStats; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; import com.linkedin.openhouse.scheduler.client.JobsServiceClient; @@ -20,8 +22,8 @@ import org.springframework.transaction.annotation.Transactional; /** - * For one operation type per call, reads PENDING rows, enriches them with file count, dispatches to - * the registered {@link BinPacker}, and submits one Spark job per bin. The {@link + * For one operation type per call, reads PENDING rows, looks up per-table stats, dispatches to the + * registered {@link BinPacker}, and submits one Spark job per returned {@link Bin}. The {@link * com.linkedin.openhouse.scheduler.SchedulerApplication}'s CommandLineRunner loops over the * registered packers and invokes {@code schedule(opType)} for each. */ @@ -57,11 +59,10 @@ public void schedule( return; } - com.linkedin.openhouse.optimizer.db.OperationType dbOperationType = operationType.toDb(); List pendingRows = operationsRepo.find( - dbOperationType, - com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING, + operationType.toDb(), + OperationStatus.PENDING, null, databaseName.orElse(null), tableName.orElse(null)); @@ -70,73 +71,49 @@ public void schedule( return; } + List pending = + pendingRows.stream().map(TableOperation::fromRow).collect(Collectors.toList()); + Set uuids = - pendingRows.stream().map(TableOperationsRow::getTableUuid).collect(Collectors.toSet()); - Map fileCountByUuid = + pending.stream().map(TableOperation::getTableUuid).collect(Collectors.toSet()); + Map statsByUuid = statsRepo.findAllById(uuids).stream() - .collect( - Collectors.toMap(TableStatsRow::getTableUuid, SchedulerRunner::extractFileCount)); + .collect(Collectors.toMap(TableStatsRow::getTableUuid, TableStats::fromRow)); - List pending = - pendingRows.stream() - .map( - row -> { - TableOperation op = TableOperation.fromRow(row); - op.setFileCount(fileCountByUuid.getOrDefault(row.getTableUuid(), 0L)); - return op; - }) - .collect(Collectors.toList()); - - List> bins = packer.pack(pending); + List bins = packer.pack(pending, statsByUuid); log.info( "Packed {} PENDING {} operations into {} bins", pending.size(), operationType, bins.size()); - bins.forEach(bin -> submitBin(operationType, bin)); + bins.forEach(this::submitBin); } - private void submitBin(OperationType operationType, List bin) { + private void submitBin(Bin bin) { + List ids = bin.getOperationIds(); + // Deduplicate PENDING rows per tableUuid for this op type, keeping the IDs in this bin. - List keepIds = bin.stream().map(TableOperation::getId).collect(Collectors.toList()); - operationsRepo.cancelDuplicatePendingBatch(operationType.toDb(), keepIds); + operationsRepo.cancelDuplicatePendingBatch(bin.getOperationType().toDb(), ids); // Claim the rows in one batched UPDATE: PENDING → SCHEDULING. - int claimedCount = operationsRepo.markSchedulingBatch(keepIds, Instant.now()); + int claimedCount = operationsRepo.markSchedulingBatch(ids, Instant.now()); if (claimedCount == 0) { log.info("All rows in bin already claimed by another scheduler instance; skipping"); return; } - // Submit the job for the rows we just claimed. - List tableNames = - bin.stream() - .map(op -> op.getDatabaseName() + "." + op.getTableName()) - .collect(Collectors.toList()); - String jobName = - "batched-" + operationType.name().toLowerCase() + "-" + Instant.now().toEpochMilli(); - Optional jobId = - jobsClient.launch(jobName, operationType.name(), tableNames, keepIds, resultsEndpoint); - + Optional jobId = bin.schedule(jobsClient, resultsEndpoint); if (jobId.isPresent()) { - int updated = operationsRepo.markScheduledBatch(keepIds, jobId.get()); + int updated = operationsRepo.markScheduledBatch(ids, jobId.get()); log.info( "Submitted job {} for {} tables ({} rows marked SCHEDULED)", jobId.get(), - tableNames.size(), + bin.getOperations().size(), updated); } else { - int reverted = operationsRepo.markPendingBatch(keepIds); + int reverted = operationsRepo.markPendingBatch(ids); log.warn( "Job submission failed; reverted {} claimed rows back to PENDING for retry on the next" + " pass", reverted); } } - - private static long extractFileCount(TableStatsRow row) { - if (row.getSnapshot() == null) { - return 0L; - } - Long count = row.getSnapshot().getNumCurrentFiles(); - return count != null ? count : 0L; - } } diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/config/SchedulerConfig.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/config/SchedulerConfig.java index 6ee16b42a..56799f433 100644 --- a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/config/SchedulerConfig.java +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/config/SchedulerConfig.java @@ -39,6 +39,8 @@ public JobsServiceClient jobsServiceClient(WebClient jobsWebClient) { */ @Bean public Map binPackers() { - return Map.of(OperationType.ORPHAN_FILES_DELETION, new FileCountBinPacker(ofdMaxFilesPerBin)); + return Map.of( + OperationType.ORPHAN_FILES_DELETION, + new FileCountBinPacker(OperationType.ORPHAN_FILES_DELETION, ofdMaxFilesPerBin)); } } diff --git a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/FileCountBinPackerTest.java b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/FileCountBinPackerTest.java index cc77cdff1..ce79a7bcd 100644 --- a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/FileCountBinPackerTest.java +++ b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/FileCountBinPackerTest.java @@ -4,84 +4,130 @@ import com.linkedin.openhouse.optimizer.model.OperationType; import com.linkedin.openhouse.optimizer.model.TableOperation; +import com.linkedin.openhouse.optimizer.model.TableStats; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.UUID; +import java.util.stream.Collectors; import org.junit.jupiter.api.Test; class FileCountBinPackerTest { private static final long MAX = 1_000_000L; - private final FileCountBinPacker packer = new FileCountBinPacker(MAX); + private final FileCountBinPacker packer = + new FileCountBinPacker(OperationType.ORPHAN_FILES_DELETION, MAX); - private static TableOperation op(String uuid, Long fileCount) { + private static TableOperation op(String uuid) { return TableOperation.builder() .id(UUID.randomUUID().toString()) .tableUuid(uuid) .databaseName("db") .tableName("tbl_" + uuid) .operationType(OperationType.ORPHAN_FILES_DELETION) - .fileCount(fileCount) .build(); } + private static TableStats stats(String uuid, Long fileCount) { + return TableStats.builder() + .tableUuid(uuid) + .snapshot(TableStats.SnapshotMetrics.builder().numCurrentFiles(fileCount).build()) + .build(); + } + + private static Map statsMap(TableStats... entries) { + Map m = new HashMap<>(); + for (TableStats s : entries) { + m.put(s.getTableUuid(), s); + } + return m; + } + @Test void emptyInput_returnsEmptyBins() { - assertThat(packer.pack(List.of())).isEmpty(); + assertThat(packer.pack(List.of(), Map.of())).isEmpty(); } @Test void singleTable_oneBin() { - TableOperation o = op("uuid-1", 100L); - List> bins = packer.pack(List.of(o)); + TableOperation o = op("uuid-1"); + List bins = packer.pack(List.of(o), statsMap(stats("uuid-1", 100L))); assertThat(bins).hasSize(1); - assertThat(bins.get(0)).containsExactly(o); + assertThat(bins.get(0).getOperations()).containsExactly(o); } @Test void tablesUnderLimit_oneBin() { - TableOperation a = op("a", 300_000L); - TableOperation b = op("b", 300_000L); - TableOperation c = op("c", 300_000L); - List> bins = packer.pack(List.of(a, b, c)); + TableOperation a = op("a"); + TableOperation b = op("b"); + TableOperation c = op("c"); + List bins = + packer.pack( + List.of(a, b, c), + statsMap(stats("a", 300_000L), stats("b", 300_000L), stats("c", 300_000L))); assertThat(bins).hasSize(1); - assertThat(bins.get(0)).hasSize(3); + assertThat(bins.get(0).getOperations()).hasSize(3); } @Test void tablesOverLimit_twoBins() { - TableOperation a = op("a", 600_000L); - TableOperation b = op("b", 600_000L); - TableOperation c = op("c", 400_000L); - List> bins = packer.pack(List.of(a, b, c)); + TableOperation a = op("a"); + TableOperation b = op("b"); + TableOperation c = op("c"); + List bins = + packer.pack( + List.of(a, b, c), + statsMap(stats("a", 600_000L), stats("b", 600_000L), stats("c", 400_000L))); assertThat(bins).hasSize(2); - assertThat(bins.get(0)).hasSize(2); // 600k + 400k - assertThat(bins.get(1)).hasSize(1); // 600k alone + assertThat(bins.get(0).getOperations()).hasSize(2); // 600k + 400k + assertThat(bins.get(1).getOperations()).hasSize(1); // 600k alone } @Test void largeTableAlone_exceedsLimitSingleBin() { - TableOperation big = op("big", 5_000_000L); - List> bins = packer.pack(List.of(big)); + TableOperation big = op("big"); + List bins = packer.pack(List.of(big), statsMap(stats("big", 5_000_000L))); assertThat(bins).hasSize(1); - assertThat(bins.get(0)).containsExactly(big); + assertThat(bins.get(0).getOperations()).containsExactly(big); + } + + @Test + void missingStats_treatedAsZero() { + TableOperation x = op("x"); + TableOperation y = op("y"); + List bins = packer.pack(List.of(x, y), Map.of()); + assertThat(bins).hasSize(1); + assertThat(bins.get(0).getOperations()).hasSize(2); } @Test void nullFileCount_treatedAsZero() { - TableOperation x = op("x", null); - TableOperation y = op("y", null); - List> bins = packer.pack(List.of(x, y)); + TableOperation x = op("x"); + TableOperation y = op("y"); + List bins = packer.pack(List.of(x, y), statsMap(stats("x", null), stats("y", null))); assertThat(bins).hasSize(1); - assertThat(bins.get(0)).hasSize(2); + assertThat(bins.get(0).getOperations()).hasSize(2); } @Test void sortedDescending_largestFirst() { - TableOperation small = op("small", 100L); - TableOperation large = op("large", 900_000L); - List> bins = packer.pack(List.of(small, large)); + TableOperation small = op("small"); + TableOperation large = op("large"); + List bins = + packer.pack( + List.of(small, large), statsMap(stats("small", 100L), stats("large", 900_000L))); assertThat(bins).hasSize(1); - assertThat(bins.get(0).get(0).getTableUuid()).isEqualTo("large"); - assertThat(bins.get(0).get(1).getTableUuid()).isEqualTo("small"); + List ordered = + bins.get(0).getOperations().stream() + .map(TableOperation::getTableUuid) + .collect(Collectors.toList()); + assertThat(ordered).containsExactly("large", "small"); + } + + @Test + void binCarriesOperationType() { + TableOperation o = op("u"); + List bins = packer.pack(List.of(o), statsMap(stats("u", 1L))); + assertThat(bins.get(0).getOperationType()).isEqualTo(OperationType.ORPHAN_FILES_DELETION); } } diff --git a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java index 7b0c47cac..245dd902a 100644 --- a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java +++ b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java @@ -3,6 +3,7 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyList; +import static org.mockito.ArgumentMatchers.anyMap; import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.never; @@ -79,7 +80,7 @@ void schedule_noPendingOps_noJobSubmitted() { runner.schedule(OFD); verify(jobsClient, never()).launch(anyString(), anyString(), anyList(), anyList(), anyString()); - verify(binPacker, never()).pack(anyList()); + verify(binPacker, never()).pack(anyList(), anyMap()); } @Test @@ -101,8 +102,8 @@ void schedule_singleBin_claimsAndMarksScheduled() { when(operationsRepo.find(OFD_DB, PENDING_DB, null, null, null)).thenReturn(List.of(row)); when(statsRepo.findAllById(any())).thenReturn(List.of(statsRow(uuid, 100_000L))); - when(binPacker.pack(anyList())) - .thenAnswer(inv -> List.of(inv.>getArgument(0))); + when(binPacker.pack(anyList(), anyMap())) + .thenAnswer(inv -> List.of(new Bin(OFD, inv.>getArgument(0)))); when(operationsRepo.markSchedulingBatch(anyList(), any())).thenReturn(1); when(operationsRepo.markScheduledBatch(anyList(), anyString())).thenReturn(1); when(jobsClient.launch(anyString(), anyString(), anyList(), anyList(), anyString())) @@ -130,8 +131,8 @@ void schedule_jobLaunchFails_marksPendingForRetry() { when(operationsRepo.find(OFD_DB, PENDING_DB, null, null, null)).thenReturn(List.of(row)); when(statsRepo.findAllById(any())).thenReturn(List.of()); - when(binPacker.pack(anyList())) - .thenAnswer(inv -> List.of(inv.>getArgument(0))); + when(binPacker.pack(anyList(), anyMap())) + .thenAnswer(inv -> List.of(new Bin(OFD, inv.>getArgument(0)))); when(operationsRepo.markSchedulingBatch(anyList(), any())).thenReturn(1); when(jobsClient.launch(anyString(), anyString(), anyList(), anyList(), anyString())) .thenReturn(Optional.empty()); @@ -151,8 +152,8 @@ void schedule_rowsAlreadyClaimed_skipsSubmit() { when(operationsRepo.find(OFD_DB, PENDING_DB, null, null, null)).thenReturn(List.of(row)); when(statsRepo.findAllById(any())).thenReturn(List.of()); - when(binPacker.pack(anyList())) - .thenAnswer(inv -> List.of(inv.>getArgument(0))); + when(binPacker.pack(anyList(), anyMap())) + .thenAnswer(inv -> List.of(new Bin(OFD, inv.>getArgument(0)))); when(operationsRepo.markSchedulingBatch(anyList(), any())).thenReturn(0); runner.schedule(OFD); @@ -170,8 +171,8 @@ void schedule_cancelsDuplicatePendingBeforeClaim() { when(operationsRepo.find(OFD_DB, PENDING_DB, null, null, null)).thenReturn(List.of(row1, row2)); when(statsRepo.findAllById(any())).thenReturn(List.of()); - when(binPacker.pack(anyList())) - .thenAnswer(inv -> List.of(inv.>getArgument(0))); + when(binPacker.pack(anyList(), anyMap())) + .thenAnswer(inv -> List.of(new Bin(OFD, inv.>getArgument(0)))); when(operationsRepo.markSchedulingBatch(anyList(), any())).thenReturn(2); when(operationsRepo.markScheduledBatch(anyList(), anyString())).thenReturn(2); when(jobsClient.launch(anyString(), anyString(), anyList(), anyList(), anyString())) From a325684067ae1e3e8a3ce542779c6bee6eb36a98 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 18 May 2026 12:44:47 -0700 Subject: [PATCH 08/19] refactor(scheduler): BinPacker takes List pairs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the two-arg pack(operations, statsByUuid) with a single-arg pack(List) where each candidate carries its own TableStats. The pairing is explicit at the join point in SchedulerRunner and the bin packer no longer assumes a uuid lookup map; null stats on a candidate are treated as zero cost. SchedulingCandidate is a Lombok @Value (Java 8 — no records). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../openhouse/scheduler/BinPacker.java | 14 ++-- .../scheduler/FileCountBinPacker.java | 22 ++++--- .../openhouse/scheduler/SchedulerRunner.java | 7 +- .../scheduler/SchedulingCandidate.java | 18 ++++++ .../scheduler/FileCountBinPackerTest.java | 64 +++++++------------ .../scheduler/SchedulerRunnerTest.java | 48 ++++++++++---- 6 files changed, 102 insertions(+), 71 deletions(-) create mode 100644 apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulingCandidate.java diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/BinPacker.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/BinPacker.java index a33df289d..90ac2faf3 100644 --- a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/BinPacker.java +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/BinPacker.java @@ -1,9 +1,7 @@ package com.linkedin.openhouse.scheduler; -import com.linkedin.openhouse.optimizer.model.TableOperation; import com.linkedin.openhouse.optimizer.model.TableStats; import java.util.List; -import java.util.Map; /** * Strategy for packing a set of operations into bins for batched job submission. Implementations @@ -11,16 +9,16 @@ * binding to an operation type is the responsibility of the scheduler configuration, not the * strategy class. * - *

{@link TableStats} is the cost source at the interface boundary. Implementations project it - * down to the minimal data needed to make their packing decision (e.g. file count for OFD) and do - * not retain the full stats payload in the returned bins. + *

{@link TableStats} is the cost source at the interface boundary, carried alongside each + * operation in a {@link SchedulingCandidate}. Implementations project the stats down to the minimal + * data needed to make their packing decision (e.g. file count for OFD) and do not retain the full + * stats payload in the returned bins. */ public interface BinPacker { /** - * Pack {@code pending} into one or more {@link Bin}s, using {@code statsByUuid} (keyed by {@link - * TableOperation#getTableUuid()}) as the cost source. Each returned bin is non-empty; the + * Pack {@code pending} into one or more {@link Bin}s. Each returned bin is non-empty; the * scheduler dispatches one Spark job per bin. */ - List pack(List pending, Map statsByUuid); + List pack(List pending); } diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/FileCountBinPacker.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/FileCountBinPacker.java index 08b21dafd..e4bc5431a 100644 --- a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/FileCountBinPacker.java +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/FileCountBinPacker.java @@ -5,6 +5,7 @@ import com.linkedin.openhouse.optimizer.model.TableStats; import java.util.ArrayList; import java.util.Comparator; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.stream.Collectors; @@ -12,10 +13,10 @@ /** * Greedy first-fit-descending bin-packer keyed on per-table file count. File count is projected - * from the per-table {@link TableStats} passed alongside the operation list; operations whose stats - * are missing or whose snapshot reports no file count are treated as zero. + * from each candidate's {@link TableStats}; candidates whose stats are {@code null} or whose + * snapshot reports no file count are treated as zero. * - *

Operations are sorted by descending file count, then assigned to the first bin whose running + *

Candidates are sorted by descending file count, then assigned to the first bin whose running * total stays at or below {@code maxFilesPerBin}. An operation larger than the limit gets its own * bin (oversized bins are allowed — we never drop an operation). */ @@ -26,18 +27,20 @@ public class FileCountBinPacker implements BinPacker { private final long maxFilesPerBin; @Override - public List pack(List pending, Map statsByUuid) { + public List pack(List pending) { if (pending.isEmpty()) { return List.of(); } - // Project once: each operation's packing cost is just a long. - Map costByOperationId = - pending.stream() - .collect(Collectors.toMap(TableOperation::getId, op -> cost(op, statsByUuid))); + // Project once: each candidate's packing cost is just a long, keyed by operation id. + Map costByOperationId = new HashMap<>(); + for (SchedulingCandidate c : pending) { + costByOperationId.put(c.getOperation().getId(), cost(c.getStats())); + } List sorted = pending.stream() + .map(SchedulingCandidate::getOperation) .sorted( Comparator.comparingLong((TableOperation op) -> costByOperationId.get(op.getId())) .reversed()) @@ -71,8 +74,7 @@ public List pack(List pending, Map stat .collect(Collectors.toList()); } - private static long cost(TableOperation op, Map statsByUuid) { - TableStats stats = statsByUuid.get(op.getTableUuid()); + private static long cost(TableStats stats) { if (stats == null || stats.getSnapshot() == null) { return 0L; } diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java index 6914001fc..a43b6dc16 100644 --- a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java @@ -80,7 +80,12 @@ public void schedule( statsRepo.findAllById(uuids).stream() .collect(Collectors.toMap(TableStatsRow::getTableUuid, TableStats::fromRow)); - List bins = packer.pack(pending, statsByUuid); + List candidates = + pending.stream() + .map(op -> new SchedulingCandidate(op, statsByUuid.get(op.getTableUuid()))) + .collect(Collectors.toList()); + + List bins = packer.pack(candidates); log.info( "Packed {} PENDING {} operations into {} bins", pending.size(), operationType, bins.size()); diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulingCandidate.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulingCandidate.java new file mode 100644 index 000000000..001dfccf6 --- /dev/null +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulingCandidate.java @@ -0,0 +1,18 @@ +package com.linkedin.openhouse.scheduler; + +import com.linkedin.openhouse.optimizer.model.TableOperation; +import com.linkedin.openhouse.optimizer.model.TableStats; +import lombok.Value; + +/** + * A pending operation paired with the stats the bin packer will use as its cost source. Built by + * the scheduler at scheduling time and handed to the {@link BinPacker} as the unit of packing. + * + *

{@link #stats} may be {@code null} when the table has no stats row yet — the packer treats + * that as zero cost rather than dropping the candidate. + */ +@Value +public class SchedulingCandidate { + TableOperation operation; + TableStats stats; +} diff --git a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/FileCountBinPackerTest.java b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/FileCountBinPackerTest.java index ce79a7bcd..964a4f815 100644 --- a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/FileCountBinPackerTest.java +++ b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/FileCountBinPackerTest.java @@ -5,9 +5,7 @@ import com.linkedin.openhouse.optimizer.model.OperationType; import com.linkedin.openhouse.optimizer.model.TableOperation; import com.linkedin.openhouse.optimizer.model.TableStats; -import java.util.HashMap; import java.util.List; -import java.util.Map; import java.util.UUID; import java.util.stream.Collectors; import org.junit.jupiter.api.Test; @@ -28,56 +26,47 @@ private static TableOperation op(String uuid) { .build(); } - private static TableStats stats(String uuid, Long fileCount) { + private static TableStats stats(Long fileCount) { return TableStats.builder() - .tableUuid(uuid) .snapshot(TableStats.SnapshotMetrics.builder().numCurrentFiles(fileCount).build()) .build(); } - private static Map statsMap(TableStats... entries) { - Map m = new HashMap<>(); - for (TableStats s : entries) { - m.put(s.getTableUuid(), s); - } - return m; + private static SchedulingCandidate candidate(String uuid, Long fileCount) { + return new SchedulingCandidate(op(uuid), stats(fileCount)); + } + + private static SchedulingCandidate candidateWithoutStats(String uuid) { + return new SchedulingCandidate(op(uuid), null); } @Test void emptyInput_returnsEmptyBins() { - assertThat(packer.pack(List.of(), Map.of())).isEmpty(); + assertThat(packer.pack(List.of())).isEmpty(); } @Test void singleTable_oneBin() { - TableOperation o = op("uuid-1"); - List bins = packer.pack(List.of(o), statsMap(stats("uuid-1", 100L))); + SchedulingCandidate c = candidate("uuid-1", 100L); + List bins = packer.pack(List.of(c)); assertThat(bins).hasSize(1); - assertThat(bins.get(0).getOperations()).containsExactly(o); + assertThat(bins.get(0).getOperations()).containsExactly(c.getOperation()); } @Test void tablesUnderLimit_oneBin() { - TableOperation a = op("a"); - TableOperation b = op("b"); - TableOperation c = op("c"); List bins = packer.pack( - List.of(a, b, c), - statsMap(stats("a", 300_000L), stats("b", 300_000L), stats("c", 300_000L))); + List.of(candidate("a", 300_000L), candidate("b", 300_000L), candidate("c", 300_000L))); assertThat(bins).hasSize(1); assertThat(bins.get(0).getOperations()).hasSize(3); } @Test void tablesOverLimit_twoBins() { - TableOperation a = op("a"); - TableOperation b = op("b"); - TableOperation c = op("c"); List bins = packer.pack( - List.of(a, b, c), - statsMap(stats("a", 600_000L), stats("b", 600_000L), stats("c", 400_000L))); + List.of(candidate("a", 600_000L), candidate("b", 600_000L), candidate("c", 400_000L))); assertThat(bins).hasSize(2); assertThat(bins.get(0).getOperations()).hasSize(2); // 600k + 400k assertThat(bins.get(1).getOperations()).hasSize(1); // 600k alone @@ -85,37 +74,31 @@ void tablesOverLimit_twoBins() { @Test void largeTableAlone_exceedsLimitSingleBin() { - TableOperation big = op("big"); - List bins = packer.pack(List.of(big), statsMap(stats("big", 5_000_000L))); + SchedulingCandidate big = candidate("big", 5_000_000L); + List bins = packer.pack(List.of(big)); assertThat(bins).hasSize(1); - assertThat(bins.get(0).getOperations()).containsExactly(big); + assertThat(bins.get(0).getOperations()).containsExactly(big.getOperation()); } @Test - void missingStats_treatedAsZero() { - TableOperation x = op("x"); - TableOperation y = op("y"); - List bins = packer.pack(List.of(x, y), Map.of()); + void nullStats_treatedAsZero() { + List bins = packer.pack(List.of(candidateWithoutStats("x"), candidateWithoutStats("y"))); assertThat(bins).hasSize(1); assertThat(bins.get(0).getOperations()).hasSize(2); } @Test void nullFileCount_treatedAsZero() { - TableOperation x = op("x"); - TableOperation y = op("y"); - List bins = packer.pack(List.of(x, y), statsMap(stats("x", null), stats("y", null))); + List bins = packer.pack(List.of(candidate("x", null), candidate("y", null))); assertThat(bins).hasSize(1); assertThat(bins.get(0).getOperations()).hasSize(2); } @Test void sortedDescending_largestFirst() { - TableOperation small = op("small"); - TableOperation large = op("large"); - List bins = - packer.pack( - List.of(small, large), statsMap(stats("small", 100L), stats("large", 900_000L))); + SchedulingCandidate small = candidate("small", 100L); + SchedulingCandidate large = candidate("large", 900_000L); + List bins = packer.pack(List.of(small, large)); assertThat(bins).hasSize(1); List ordered = bins.get(0).getOperations().stream() @@ -126,8 +109,7 @@ void sortedDescending_largestFirst() { @Test void binCarriesOperationType() { - TableOperation o = op("u"); - List bins = packer.pack(List.of(o), statsMap(stats("u", 1L))); + List bins = packer.pack(List.of(candidate("u", 1L))); assertThat(bins.get(0).getOperationType()).isEqualTo(OperationType.ORPHAN_FILES_DELETION); } } diff --git a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java index 245dd902a..d4ff3450e 100644 --- a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java +++ b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java @@ -3,7 +3,6 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyList; -import static org.mockito.ArgumentMatchers.anyMap; import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.never; @@ -14,7 +13,6 @@ import com.linkedin.openhouse.optimizer.db.TableOperationsRow; import com.linkedin.openhouse.optimizer.db.TableStatsRow; import com.linkedin.openhouse.optimizer.model.OperationType; -import com.linkedin.openhouse.optimizer.model.TableOperation; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; import com.linkedin.openhouse.scheduler.client.JobsServiceClient; @@ -80,7 +78,7 @@ void schedule_noPendingOps_noJobSubmitted() { runner.schedule(OFD); verify(jobsClient, never()).launch(anyString(), anyString(), anyList(), anyList(), anyString()); - verify(binPacker, never()).pack(anyList(), anyMap()); + verify(binPacker, never()).pack(anyList()); } @Test @@ -102,8 +100,15 @@ void schedule_singleBin_claimsAndMarksScheduled() { when(operationsRepo.find(OFD_DB, PENDING_DB, null, null, null)).thenReturn(List.of(row)); when(statsRepo.findAllById(any())).thenReturn(List.of(statsRow(uuid, 100_000L))); - when(binPacker.pack(anyList(), anyMap())) - .thenAnswer(inv -> List.of(new Bin(OFD, inv.>getArgument(0)))); + when(binPacker.pack(anyList())) + .thenAnswer( + inv -> + List.of( + new Bin( + OFD, + inv.>getArgument(0).stream() + .map(SchedulingCandidate::getOperation) + .collect(java.util.stream.Collectors.toList())))); when(operationsRepo.markSchedulingBatch(anyList(), any())).thenReturn(1); when(operationsRepo.markScheduledBatch(anyList(), anyString())).thenReturn(1); when(jobsClient.launch(anyString(), anyString(), anyList(), anyList(), anyString())) @@ -131,8 +136,15 @@ void schedule_jobLaunchFails_marksPendingForRetry() { when(operationsRepo.find(OFD_DB, PENDING_DB, null, null, null)).thenReturn(List.of(row)); when(statsRepo.findAllById(any())).thenReturn(List.of()); - when(binPacker.pack(anyList(), anyMap())) - .thenAnswer(inv -> List.of(new Bin(OFD, inv.>getArgument(0)))); + when(binPacker.pack(anyList())) + .thenAnswer( + inv -> + List.of( + new Bin( + OFD, + inv.>getArgument(0).stream() + .map(SchedulingCandidate::getOperation) + .collect(java.util.stream.Collectors.toList())))); when(operationsRepo.markSchedulingBatch(anyList(), any())).thenReturn(1); when(jobsClient.launch(anyString(), anyString(), anyList(), anyList(), anyString())) .thenReturn(Optional.empty()); @@ -152,8 +164,15 @@ void schedule_rowsAlreadyClaimed_skipsSubmit() { when(operationsRepo.find(OFD_DB, PENDING_DB, null, null, null)).thenReturn(List.of(row)); when(statsRepo.findAllById(any())).thenReturn(List.of()); - when(binPacker.pack(anyList(), anyMap())) - .thenAnswer(inv -> List.of(new Bin(OFD, inv.>getArgument(0)))); + when(binPacker.pack(anyList())) + .thenAnswer( + inv -> + List.of( + new Bin( + OFD, + inv.>getArgument(0).stream() + .map(SchedulingCandidate::getOperation) + .collect(java.util.stream.Collectors.toList())))); when(operationsRepo.markSchedulingBatch(anyList(), any())).thenReturn(0); runner.schedule(OFD); @@ -171,8 +190,15 @@ void schedule_cancelsDuplicatePendingBeforeClaim() { when(operationsRepo.find(OFD_DB, PENDING_DB, null, null, null)).thenReturn(List.of(row1, row2)); when(statsRepo.findAllById(any())).thenReturn(List.of()); - when(binPacker.pack(anyList(), anyMap())) - .thenAnswer(inv -> List.of(new Bin(OFD, inv.>getArgument(0)))); + when(binPacker.pack(anyList())) + .thenAnswer( + inv -> + List.of( + new Bin( + OFD, + inv.>getArgument(0).stream() + .map(SchedulingCandidate::getOperation) + .collect(java.util.stream.Collectors.toList())))); when(operationsRepo.markSchedulingBatch(anyList(), any())).thenReturn(2); when(operationsRepo.markScheduledBatch(anyList(), anyString())).thenReturn(2); when(jobsClient.launch(anyString(), anyString(), anyList(), anyList(), anyString())) From 9e5fbae8649526094bf20e2df824ce53a4fee796 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 18 May 2026 13:20:29 -0700 Subject: [PATCH 09/19] style(scheduler): convert FileCountBinPacker for-loops to functional patterns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cost-map building is now a stream collect. The bin-assignment search uses IntStream.range().filter().findFirst() instead of a counted for loop. The outer iteration over sorted operations is sorted.forEach(...). First-fit-descending bin packing still maintains mutable running-totals state across placements (each placement depends on prior bin totals) — that's delegated to a small placeInBin helper rather than inlined. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../scheduler/FileCountBinPacker.java | 52 ++++++++++--------- 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/FileCountBinPacker.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/FileCountBinPacker.java index e4bc5431a..2a7295e7b 100644 --- a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/FileCountBinPacker.java +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/FileCountBinPacker.java @@ -5,10 +5,11 @@ import com.linkedin.openhouse.optimizer.model.TableStats; import java.util.ArrayList; import java.util.Comparator; -import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.OptionalInt; import java.util.stream.Collectors; +import java.util.stream.IntStream; import lombok.RequiredArgsConstructor; /** @@ -33,10 +34,9 @@ public List pack(List pending) { } // Project once: each candidate's packing cost is just a long, keyed by operation id. - Map costByOperationId = new HashMap<>(); - for (SchedulingCandidate c : pending) { - costByOperationId.put(c.getOperation().getId(), cost(c.getStats())); - } + Map costByOperationId = + pending.stream() + .collect(Collectors.toMap(c -> c.getOperation().getId(), c -> cost(c.getStats()))); List sorted = pending.stream() @@ -46,34 +46,36 @@ public List pack(List pending) { .reversed()) .collect(Collectors.toList()); + // First-fit-descending is inherently stateful — each placement depends on running totals + // for the bins assembled so far. Two parallel lists carry that state; sorted.forEach mutates + // them via the helper. List> binContents = new ArrayList<>(); List binTotals = new ArrayList<>(); - - for (TableOperation op : sorted) { - long cost = costByOperationId.get(op.getId()); - int placed = -1; - for (int i = 0; i < binContents.size(); i++) { - if (binTotals.get(i) + cost <= maxFilesPerBin || binTotals.get(i) == 0) { - placed = i; - break; - } - } - if (placed >= 0) { - binContents.get(placed).add(op); - binTotals.set(placed, binTotals.get(placed) + cost); - } else { - List newBin = new ArrayList<>(); - newBin.add(op); - binContents.add(newBin); - binTotals.add(cost); - } - } + sorted.forEach(op -> placeInBin(op, costByOperationId.get(op.getId()), binContents, binTotals)); return binContents.stream() .map(ops -> new Bin(operationType, ops)) .collect(Collectors.toList()); } + private void placeInBin( + TableOperation op, long cost, List> bins, List totals) { + OptionalInt placed = + IntStream.range(0, bins.size()) + .filter(i -> totals.get(i) + cost <= maxFilesPerBin || totals.get(i) == 0) + .findFirst(); + if (placed.isPresent()) { + int idx = placed.getAsInt(); + bins.get(idx).add(op); + totals.set(idx, totals.get(idx) + cost); + } else { + List newBin = new ArrayList<>(); + newBin.add(op); + bins.add(newBin); + totals.add(cost); + } + } + private static long cost(TableStats stats) { if (stats == null || stats.getSnapshot() == null) { return 0L; From 7dd1e972c7a401c487445ca511d60c7b0292b398 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 18 May 2026 13:30:53 -0700 Subject: [PATCH 10/19] refactor(scheduler): final resultsEndpoint, throw on missing packer, drop null guards MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SchedulerRunner: - resultsEndpoint moves to constructor injection (final field); explicit constructor replaces @RequiredArgsConstructor so @Value can decorate the param. - Missing-packer is now IllegalStateException — silent skip masked miswiring. - Stats lookup site carries a trade-off comment: per-cycle fetch trades a batched query against denormalizing onto TableOperation (fresher data and smaller operations vs. one fewer query). - Ops without a table_stats row are filtered at the boundary and logged; SchedulingCandidate.stats is guaranteed non-null at the BinPacker call. SchedulingCandidate.operation and .stats are @NonNull (Lombok-enforced). FileCountBinPacker: placeInBin is inlined into sorted.forEach; cost() no longer guards stats or snapshot for null — those are now invariants. Tests updated: new constructor signature, throw assertion for missing packer, two new tests for the stats-filter path. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../scheduler/FileCountBinPacker.java | 50 ++++---- .../openhouse/scheduler/SchedulerRunner.java | 51 ++++++-- .../scheduler/SchedulingCandidate.java | 9 +- .../scheduler/FileCountBinPackerTest.java | 11 -- .../scheduler/SchedulerRunnerTest.java | 115 +++++++++++------- 5 files changed, 136 insertions(+), 100 deletions(-) diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/FileCountBinPacker.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/FileCountBinPacker.java index 2a7295e7b..9110b0dea 100644 --- a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/FileCountBinPacker.java +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/FileCountBinPacker.java @@ -13,9 +13,8 @@ import lombok.RequiredArgsConstructor; /** - * Greedy first-fit-descending bin-packer keyed on per-table file count. File count is projected - * from each candidate's {@link TableStats}; candidates whose stats are {@code null} or whose - * snapshot reports no file count are treated as zero. + * Greedy first-fit-descending bin-packer keyed on per-table file count, projected from each + * candidate's {@link TableStats}. * *

Candidates are sorted by descending file count, then assigned to the first bin whose running * total stays at or below {@code maxFilesPerBin}. An operation larger than the limit gets its own @@ -46,40 +45,35 @@ public List pack(List pending) { .reversed()) .collect(Collectors.toList()); - // First-fit-descending is inherently stateful — each placement depends on running totals - // for the bins assembled so far. Two parallel lists carry that state; sorted.forEach mutates - // them via the helper. + // First-fit-descending is inherently stateful — each placement depends on the running totals + // for bins assembled so far. List> binContents = new ArrayList<>(); List binTotals = new ArrayList<>(); - sorted.forEach(op -> placeInBin(op, costByOperationId.get(op.getId()), binContents, binTotals)); + sorted.forEach( + op -> { + long c = costByOperationId.get(op.getId()); + OptionalInt placed = + IntStream.range(0, binContents.size()) + .filter(i -> binTotals.get(i) + c <= maxFilesPerBin || binTotals.get(i) == 0) + .findFirst(); + if (placed.isPresent()) { + int idx = placed.getAsInt(); + binContents.get(idx).add(op); + binTotals.set(idx, binTotals.get(idx) + c); + } else { + List newBin = new ArrayList<>(); + newBin.add(op); + binContents.add(newBin); + binTotals.add(c); + } + }); return binContents.stream() .map(ops -> new Bin(operationType, ops)) .collect(Collectors.toList()); } - private void placeInBin( - TableOperation op, long cost, List> bins, List totals) { - OptionalInt placed = - IntStream.range(0, bins.size()) - .filter(i -> totals.get(i) + cost <= maxFilesPerBin || totals.get(i) == 0) - .findFirst(); - if (placed.isPresent()) { - int idx = placed.getAsInt(); - bins.get(idx).add(op); - totals.set(idx, totals.get(idx) + cost); - } else { - List newBin = new ArrayList<>(); - newBin.add(op); - bins.add(newBin); - totals.add(cost); - } - } - private static long cost(TableStats stats) { - if (stats == null || stats.getSnapshot() == null) { - return 0L; - } Long n = stats.getSnapshot().getNumCurrentFiles(); return n != null ? n : 0L; } diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java index a43b6dc16..72d2f8763 100644 --- a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java @@ -15,7 +15,6 @@ import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; -import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Component; @@ -29,16 +28,25 @@ */ @Slf4j @Component -@RequiredArgsConstructor public class SchedulerRunner { - private final TableOperationsRepository operationsRepo; private final TableStatsRepository statsRepo; private final JobsServiceClient jobsClient; private final Map binPackers; - - @Value("${scheduler.results-endpoint}") - private String resultsEndpoint; + private final String resultsEndpoint; + + public SchedulerRunner( + TableOperationsRepository operationsRepo, + TableStatsRepository statsRepo, + JobsServiceClient jobsClient, + Map binPackers, + @Value("${scheduler.results-endpoint}") String resultsEndpoint) { + this.operationsRepo = operationsRepo; + this.statsRepo = statsRepo; + this.jobsClient = jobsClient; + this.binPackers = binPackers; + this.resultsEndpoint = resultsEndpoint; + } /** Schedule all PENDING operations of the given type across all databases. */ public void schedule(OperationType operationType) { @@ -55,8 +63,8 @@ public void schedule( BinPacker packer = binPackers.get(operationType); if (packer == null) { - log.warn("No BinPacker registered for operation type {}; skipping", operationType); - return; + throw new IllegalStateException( + "No BinPacker registered for operation type " + operationType); } List pendingRows = @@ -74,20 +82,43 @@ public void schedule( List pending = pendingRows.stream().map(TableOperation::fromRow).collect(Collectors.toList()); + // Tradeoff: we fetch fresh table_stats per scheduling cycle (one batched query) rather than + // denormalizing the relevant fields onto TableOperation. The denormalized alternative would + // remove the per-cycle lookup but widen the TableOperation row and serve staler data; the + // current shape favors smaller operations + freshness over fewer queries. Set uuids = pending.stream().map(TableOperation::getTableUuid).collect(Collectors.toSet()); Map statsByUuid = statsRepo.findAllById(uuids).stream() .collect(Collectors.toMap(TableStatsRow::getTableUuid, TableStats::fromRow)); - List candidates = + // Filter at the boundary so SchedulingCandidate.stats is guaranteed non-null. A table without + // a stats row gets skipped this cycle and reconsidered after stats land. + List withStats = pending.stream() + .filter(op -> statsByUuid.containsKey(op.getTableUuid())) + .collect(Collectors.toList()); + if (withStats.size() < pending.size()) { + log.warn( + "Skipped {} {} operations with no table_stats row", + pending.size() - withStats.size(), + operationType); + } + if (withStats.isEmpty()) { + return; + } + + List candidates = + withStats.stream() .map(op -> new SchedulingCandidate(op, statsByUuid.get(op.getTableUuid()))) .collect(Collectors.toList()); List bins = packer.pack(candidates); log.info( - "Packed {} PENDING {} operations into {} bins", pending.size(), operationType, bins.size()); + "Packed {} PENDING {} operations into {} bins", + candidates.size(), + operationType, + bins.size()); bins.forEach(this::submitBin); } diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulingCandidate.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulingCandidate.java index 001dfccf6..4aadaaabf 100644 --- a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulingCandidate.java +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulingCandidate.java @@ -2,17 +2,18 @@ import com.linkedin.openhouse.optimizer.model.TableOperation; import com.linkedin.openhouse.optimizer.model.TableStats; +import lombok.NonNull; import lombok.Value; /** * A pending operation paired with the stats the bin packer will use as its cost source. Built by * the scheduler at scheduling time and handed to the {@link BinPacker} as the unit of packing. * - *

{@link #stats} may be {@code null} when the table has no stats row yet — the packer treats - * that as zero cost rather than dropping the candidate. + *

Both fields are non-null. The scheduler filters out operations whose tables have no stats row + * before constructing candidates. */ @Value public class SchedulingCandidate { - TableOperation operation; - TableStats stats; + @NonNull TableOperation operation; + @NonNull TableStats stats; } diff --git a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/FileCountBinPackerTest.java b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/FileCountBinPackerTest.java index 964a4f815..7185494b5 100644 --- a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/FileCountBinPackerTest.java +++ b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/FileCountBinPackerTest.java @@ -36,10 +36,6 @@ private static SchedulingCandidate candidate(String uuid, Long fileCount) { return new SchedulingCandidate(op(uuid), stats(fileCount)); } - private static SchedulingCandidate candidateWithoutStats(String uuid) { - return new SchedulingCandidate(op(uuid), null); - } - @Test void emptyInput_returnsEmptyBins() { assertThat(packer.pack(List.of())).isEmpty(); @@ -80,13 +76,6 @@ void largeTableAlone_exceedsLimitSingleBin() { assertThat(bins.get(0).getOperations()).containsExactly(big.getOperation()); } - @Test - void nullStats_treatedAsZero() { - List bins = packer.pack(List.of(candidateWithoutStats("x"), candidateWithoutStats("y"))); - assertThat(bins).hasSize(1); - assertThat(bins.get(0).getOperations()).hasSize(2); - } - @Test void nullFileCount_treatedAsZero() { List bins = packer.pack(List.of(candidate("x", null), candidate("y", null))); diff --git a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java index d4ff3450e..b50ae45d2 100644 --- a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java +++ b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java @@ -1,6 +1,7 @@ package com.linkedin.openhouse.scheduler; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyList; import static org.mockito.ArgumentMatchers.anyString; @@ -20,13 +21,13 @@ import java.util.Map; import java.util.Optional; import java.util.UUID; +import java.util.stream.Collectors; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.ArgumentCaptor; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; -import org.springframework.test.util.ReflectionTestUtils; @ExtendWith(MockitoExtension.class) class SchedulerRunnerTest { @@ -37,6 +38,7 @@ class SchedulerRunnerTest { private static final com.linkedin.openhouse.optimizer.db.OperationStatus PENDING_DB = com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING; private static final String OFD_STR = OFD.name(); + private static final String RESULTS_ENDPOINT = "http://localhost:8080/v1/table-operations"; @Mock private TableOperationsRepository operationsRepo; @Mock private TableStatsRepository statsRepo; @@ -47,9 +49,9 @@ class SchedulerRunnerTest { @BeforeEach void setUp() { - runner = new SchedulerRunner(operationsRepo, statsRepo, jobsClient, Map.of(OFD, binPacker)); - ReflectionTestUtils.setField( - runner, "resultsEndpoint", "http://localhost:8080/v1/table-operations"); + runner = + new SchedulerRunner( + operationsRepo, statsRepo, jobsClient, Map.of(OFD, binPacker), RESULTS_ENDPOINT); } private TableOperationsRow pendingRow(String uuid, String db, String table) { @@ -71,6 +73,19 @@ private TableStatsRow statsRow(String uuid, long numCurrentFiles) { .build(); } + /** Stubs the bin packer to return one bin containing every candidate. */ + private void stubOneBinForAllCandidates() { + when(binPacker.pack(anyList())) + .thenAnswer( + inv -> + List.of( + new Bin( + OFD, + inv.>getArgument(0).stream() + .map(SchedulingCandidate::getOperation) + .collect(Collectors.toList())))); + } + @Test void schedule_noPendingOps_noJobSubmitted() { when(operationsRepo.find(OFD_DB, PENDING_DB, null, null, null)).thenReturn(List.of()); @@ -82,12 +97,13 @@ void schedule_noPendingOps_noJobSubmitted() { } @Test - void schedule_unknownOperationType_noOp() { + void schedule_unknownOperationType_throws() { SchedulerRunner emptyRunner = - new SchedulerRunner(operationsRepo, statsRepo, jobsClient, Map.of()); - ReflectionTestUtils.setField(emptyRunner, "resultsEndpoint", "x"); + new SchedulerRunner(operationsRepo, statsRepo, jobsClient, Map.of(), RESULTS_ENDPOINT); - emptyRunner.schedule(OFD); + assertThatThrownBy(() -> emptyRunner.schedule(OFD)) + .isInstanceOf(IllegalStateException.class) + .hasMessageContaining("No BinPacker registered"); verify(operationsRepo, never()).find(any(), any(), any(), any(), any()); verify(jobsClient, never()).launch(anyString(), anyString(), anyList(), anyList(), anyString()); @@ -100,15 +116,7 @@ void schedule_singleBin_claimsAndMarksScheduled() { when(operationsRepo.find(OFD_DB, PENDING_DB, null, null, null)).thenReturn(List.of(row)); when(statsRepo.findAllById(any())).thenReturn(List.of(statsRow(uuid, 100_000L))); - when(binPacker.pack(anyList())) - .thenAnswer( - inv -> - List.of( - new Bin( - OFD, - inv.>getArgument(0).stream() - .map(SchedulingCandidate::getOperation) - .collect(java.util.stream.Collectors.toList())))); + stubOneBinForAllCandidates(); when(operationsRepo.markSchedulingBatch(anyList(), any())).thenReturn(1); when(operationsRepo.markScheduledBatch(anyList(), anyString())).thenReturn(1); when(jobsClient.launch(anyString(), anyString(), anyList(), anyList(), anyString())) @@ -135,16 +143,8 @@ void schedule_jobLaunchFails_marksPendingForRetry() { TableOperationsRow row = pendingRow(uuid, "db1", "tbl1"); when(operationsRepo.find(OFD_DB, PENDING_DB, null, null, null)).thenReturn(List.of(row)); - when(statsRepo.findAllById(any())).thenReturn(List.of()); - when(binPacker.pack(anyList())) - .thenAnswer( - inv -> - List.of( - new Bin( - OFD, - inv.>getArgument(0).stream() - .map(SchedulingCandidate::getOperation) - .collect(java.util.stream.Collectors.toList())))); + when(statsRepo.findAllById(any())).thenReturn(List.of(statsRow(uuid, 100L))); + stubOneBinForAllCandidates(); when(operationsRepo.markSchedulingBatch(anyList(), any())).thenReturn(1); when(jobsClient.launch(anyString(), anyString(), anyList(), anyList(), anyString())) .thenReturn(Optional.empty()); @@ -163,16 +163,8 @@ void schedule_rowsAlreadyClaimed_skipsSubmit() { TableOperationsRow row = pendingRow(uuid, "db1", "tbl1"); when(operationsRepo.find(OFD_DB, PENDING_DB, null, null, null)).thenReturn(List.of(row)); - when(statsRepo.findAllById(any())).thenReturn(List.of()); - when(binPacker.pack(anyList())) - .thenAnswer( - inv -> - List.of( - new Bin( - OFD, - inv.>getArgument(0).stream() - .map(SchedulingCandidate::getOperation) - .collect(java.util.stream.Collectors.toList())))); + when(statsRepo.findAllById(any())).thenReturn(List.of(statsRow(uuid, 100L))); + stubOneBinForAllCandidates(); when(operationsRepo.markSchedulingBatch(anyList(), any())).thenReturn(0); runner.schedule(OFD); @@ -189,16 +181,8 @@ void schedule_cancelsDuplicatePendingBeforeClaim() { TableOperationsRow row2 = pendingRow(uuid, "db1", "tbl1"); when(operationsRepo.find(OFD_DB, PENDING_DB, null, null, null)).thenReturn(List.of(row1, row2)); - when(statsRepo.findAllById(any())).thenReturn(List.of()); - when(binPacker.pack(anyList())) - .thenAnswer( - inv -> - List.of( - new Bin( - OFD, - inv.>getArgument(0).stream() - .map(SchedulingCandidate::getOperation) - .collect(java.util.stream.Collectors.toList())))); + when(statsRepo.findAllById(any())).thenReturn(List.of(statsRow(uuid, 100L))); + stubOneBinForAllCandidates(); when(operationsRepo.markSchedulingBatch(anyList(), any())).thenReturn(2); when(operationsRepo.markScheduledBatch(anyList(), anyString())).thenReturn(2); when(jobsClient.launch(anyString(), anyString(), anyList(), anyList(), anyString())) @@ -208,4 +192,41 @@ void schedule_cancelsDuplicatePendingBeforeClaim() { verify(operationsRepo).cancelDuplicatePendingBatch(eq(OFD_DB), anyList()); } + + @Test + void schedule_opsWithoutStats_skipped() { + String withStats = UUID.randomUUID().toString(); + String missing = UUID.randomUUID().toString(); + TableOperationsRow withStatsRow = pendingRow(withStats, "db1", "tblA"); + TableOperationsRow missingRow = pendingRow(missing, "db1", "tblB"); + + when(operationsRepo.find(OFD_DB, PENDING_DB, null, null, null)) + .thenReturn(List.of(withStatsRow, missingRow)); + when(statsRepo.findAllById(any())).thenReturn(List.of(statsRow(withStats, 50L))); + stubOneBinForAllCandidates(); + when(operationsRepo.markSchedulingBatch(anyList(), any())).thenReturn(1); + when(operationsRepo.markScheduledBatch(anyList(), anyString())).thenReturn(1); + when(jobsClient.launch(anyString(), anyString(), anyList(), anyList(), anyString())) + .thenReturn(Optional.of("job-skip")); + + runner.schedule(OFD); + + // Only the op with a stats row makes it into the claim batch. + ArgumentCaptor> ids = ArgumentCaptor.forClass(List.class); + verify(operationsRepo).markSchedulingBatch(ids.capture(), any()); + assertThat(ids.getValue()).containsExactly(withStatsRow.getId()); + } + + @Test + void schedule_allOpsWithoutStats_noJobSubmitted() { + TableOperationsRow row = pendingRow(UUID.randomUUID().toString(), "db1", "tbl1"); + + when(operationsRepo.find(OFD_DB, PENDING_DB, null, null, null)).thenReturn(List.of(row)); + when(statsRepo.findAllById(any())).thenReturn(List.of()); + + runner.schedule(OFD); + + verify(binPacker, never()).pack(anyList()); + verify(jobsClient, never()).launch(anyString(), anyString(), anyList(), anyList(), anyString()); + } } From c73d8cb00d5f37ab2159690eac4fe2b68646d66f Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 18 May 2026 14:47:58 -0700 Subject: [PATCH 11/19] fix(scheduler): only launch + mark SCHEDULED the ops actually claimed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bug: markSchedulingBatch returns an aggregate count, not the per-id outcome. If another instance has already claimed some of the bin's ids (or those rows no longer exist), the old code still launched the Spark job for the full bin and marked every id SCHEDULED — clobbering rows owned by another instance. Fix: after the SCHEDULING CAS, call the new findClaimedIds(ids, scheduledAt) to determine which ids this instance actually owns, subset the bin to that list, launch only the claimed subset, and mark only those SCHEDULED (or revert only those to PENDING on launch failure). - New Bin.subset(Collection) for the bin-narrowing step. - SchedulerRunner.submitBin uses claimedIds for all downstream calls. - New SchedulerRunnerTest.schedule_partialClaim_launchesAndMarksOnlyClaimedSubset covers the bug case; existing tests stub findClaimedIds to match the full-claim happy path. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../com/linkedin/openhouse/scheduler/Bin.java | 14 ++++++ .../openhouse/scheduler/SchedulerRunner.java | 26 ++++++++--- .../scheduler/SchedulerRunnerTest.java | 45 +++++++++++++++++++ 3 files changed, 78 insertions(+), 7 deletions(-) diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/Bin.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/Bin.java index 96c480eba..f61315bfb 100644 --- a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/Bin.java +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/Bin.java @@ -4,8 +4,11 @@ import com.linkedin.openhouse.optimizer.model.TableOperation; import com.linkedin.openhouse.scheduler.client.JobsServiceClient; import java.time.Instant; +import java.util.Collection; +import java.util.HashSet; import java.util.List; import java.util.Optional; +import java.util.Set; import java.util.stream.Collectors; import lombok.Getter; import lombok.RequiredArgsConstructor; @@ -34,6 +37,17 @@ public List getTableNames() { .collect(Collectors.toList()); } + /** + * Return a new {@link Bin} containing only the operations whose IDs are in {@code keepIds}. Used + * by the scheduler to narrow the bin to the rows it actually claimed before launching the job. + */ + public Bin subset(Collection keepIds) { + Set keep = new HashSet<>(keepIds); + List filtered = + operations.stream().filter(op -> keep.contains(op.getId())).collect(Collectors.toList()); + return new Bin(operationType, filtered); + } + /** * Submit this bin as a single Spark job. Returns the job id on success, or empty on submission * failure — the caller is responsible for the surrounding status updates. diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java index 72d2f8763..124633aa3 100644 --- a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java @@ -129,23 +129,35 @@ private void submitBin(Bin bin) { // Deduplicate PENDING rows per tableUuid for this op type, keeping the IDs in this bin. operationsRepo.cancelDuplicatePendingBatch(bin.getOperationType().toDb(), ids); - // Claim the rows in one batched UPDATE: PENDING → SCHEDULING. - int claimedCount = operationsRepo.markSchedulingBatch(ids, Instant.now()); - if (claimedCount == 0) { + // Claim the rows in one batched UPDATE: PENDING → SCHEDULING. The UPDATE's row count is just + // an aggregate — to know *which* rows we own, re-query for SCHEDULING rows tagged with our + // scheduledAt watermark. Anything not in that subset belongs to another instance or was + // canceled, and must not be submitted or marked SCHEDULED. + Instant claimedAt = Instant.now(); + operationsRepo.markSchedulingBatch(ids, claimedAt); + List claimedIds = operationsRepo.findClaimedIds(ids, claimedAt); + if (claimedIds.isEmpty()) { log.info("All rows in bin already claimed by another scheduler instance; skipping"); return; } + if (claimedIds.size() < ids.size()) { + log.info( + "Partial claim: {} of {} ops in bin claimed; launching job for claimed subset only", + claimedIds.size(), + ids.size()); + } - Optional jobId = bin.schedule(jobsClient, resultsEndpoint); + Bin claimedBin = bin.subset(claimedIds); + Optional jobId = claimedBin.schedule(jobsClient, resultsEndpoint); if (jobId.isPresent()) { - int updated = operationsRepo.markScheduledBatch(ids, jobId.get()); + int updated = operationsRepo.markScheduledBatch(claimedIds, jobId.get()); log.info( "Submitted job {} for {} tables ({} rows marked SCHEDULED)", jobId.get(), - bin.getOperations().size(), + claimedBin.getOperations().size(), updated); } else { - int reverted = operationsRepo.markPendingBatch(ids); + int reverted = operationsRepo.markPendingBatch(claimedIds); log.warn( "Job submission failed; reverted {} claimed rows back to PENDING for retry on the next" + " pass", diff --git a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java index b50ae45d2..7722a6156 100644 --- a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java +++ b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java @@ -118,6 +118,7 @@ void schedule_singleBin_claimsAndMarksScheduled() { when(statsRepo.findAllById(any())).thenReturn(List.of(statsRow(uuid, 100_000L))); stubOneBinForAllCandidates(); when(operationsRepo.markSchedulingBatch(anyList(), any())).thenReturn(1); + when(operationsRepo.findClaimedIds(anyList(), any())).thenReturn(List.of(row.getId())); when(operationsRepo.markScheduledBatch(anyList(), anyString())).thenReturn(1); when(jobsClient.launch(anyString(), anyString(), anyList(), anyList(), anyString())) .thenReturn(Optional.of("job-123")); @@ -146,6 +147,7 @@ void schedule_jobLaunchFails_marksPendingForRetry() { when(statsRepo.findAllById(any())).thenReturn(List.of(statsRow(uuid, 100L))); stubOneBinForAllCandidates(); when(operationsRepo.markSchedulingBatch(anyList(), any())).thenReturn(1); + when(operationsRepo.findClaimedIds(anyList(), any())).thenReturn(List.of(row.getId())); when(jobsClient.launch(anyString(), anyString(), anyList(), anyList(), anyString())) .thenReturn(Optional.empty()); when(operationsRepo.markPendingBatch(anyList())).thenReturn(1); @@ -166,6 +168,7 @@ void schedule_rowsAlreadyClaimed_skipsSubmit() { when(statsRepo.findAllById(any())).thenReturn(List.of(statsRow(uuid, 100L))); stubOneBinForAllCandidates(); when(operationsRepo.markSchedulingBatch(anyList(), any())).thenReturn(0); + when(operationsRepo.findClaimedIds(anyList(), any())).thenReturn(List.of()); runner.schedule(OFD); @@ -184,6 +187,8 @@ void schedule_cancelsDuplicatePendingBeforeClaim() { when(statsRepo.findAllById(any())).thenReturn(List.of(statsRow(uuid, 100L))); stubOneBinForAllCandidates(); when(operationsRepo.markSchedulingBatch(anyList(), any())).thenReturn(2); + when(operationsRepo.findClaimedIds(anyList(), any())) + .thenReturn(List.of(row1.getId(), row2.getId())); when(operationsRepo.markScheduledBatch(anyList(), anyString())).thenReturn(2); when(jobsClient.launch(anyString(), anyString(), anyList(), anyList(), anyString())) .thenReturn(Optional.of("job-789")); @@ -193,6 +198,45 @@ void schedule_cancelsDuplicatePendingBeforeClaim() { verify(operationsRepo).cancelDuplicatePendingBatch(eq(OFD_DB), anyList()); } + @Test + void schedule_partialClaim_launchesAndMarksOnlyClaimedSubset() { + String uuidA = UUID.randomUUID().toString(); + String uuidB = UUID.randomUUID().toString(); + TableOperationsRow rowA = pendingRow(uuidA, "db1", "tblA"); + TableOperationsRow rowB = pendingRow(uuidB, "db1", "tblB"); + + when(operationsRepo.find(OFD_DB, PENDING_DB, null, null, null)).thenReturn(List.of(rowA, rowB)); + when(statsRepo.findAllById(any())) + .thenReturn(List.of(statsRow(uuidA, 100L), statsRow(uuidB, 100L))); + stubOneBinForAllCandidates(); + // We submitted ids [A, B] to mark; only A was actually claimed (B owned by another instance). + when(operationsRepo.markSchedulingBatch(anyList(), any())).thenReturn(1); + when(operationsRepo.findClaimedIds(anyList(), any())).thenReturn(List.of(rowA.getId())); + when(operationsRepo.markScheduledBatch(anyList(), anyString())).thenReturn(1); + when(jobsClient.launch(anyString(), anyString(), anyList(), anyList(), anyString())) + .thenReturn(Optional.of("job-partial")); + + runner.schedule(OFD); + + // Job is launched for the claimed subset only. + ArgumentCaptor> launchedTableNames = ArgumentCaptor.forClass(List.class); + ArgumentCaptor> launchedOpIds = ArgumentCaptor.forClass(List.class); + verify(jobsClient) + .launch( + anyString(), + anyString(), + launchedTableNames.capture(), + launchedOpIds.capture(), + anyString()); + assertThat(launchedTableNames.getValue()).containsExactly("db1.tblA"); + assertThat(launchedOpIds.getValue()).containsExactly(rowA.getId()); + + // markScheduledBatch is called only with the claimed id, not the unclaimed one. + verify(operationsRepo).markScheduledBatch(eq(List.of(rowA.getId())), eq("job-partial")); + verify(operationsRepo, never()).markScheduledBatch(eq(List.of(rowB.getId())), anyString()); + verify(operationsRepo, never()).markPendingBatch(anyList()); + } + @Test void schedule_opsWithoutStats_skipped() { String withStats = UUID.randomUUID().toString(); @@ -205,6 +249,7 @@ void schedule_opsWithoutStats_skipped() { when(statsRepo.findAllById(any())).thenReturn(List.of(statsRow(withStats, 50L))); stubOneBinForAllCandidates(); when(operationsRepo.markSchedulingBatch(anyList(), any())).thenReturn(1); + when(operationsRepo.findClaimedIds(anyList(), any())).thenReturn(List.of(withStatsRow.getId())); when(operationsRepo.markScheduledBatch(anyList(), anyString())).thenReturn(1); when(jobsClient.launch(anyString(), anyString(), anyList(), anyList(), anyString())) .thenReturn(Optional.of("job-skip")); From ad1bf4c9491a9b44e6d7fda53fc59ab297f6d6b4 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 18 May 2026 15:05:24 -0700 Subject: [PATCH 12/19] feat(scheduler): add SingletonBinPacker; register for SNAPSHOT_EXPIRATION MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SingletonBinPacker emits one Bin per candidate — used for operation types where batching a single Spark job across multiple tables is not safe or not worth the orchestration overhead. Snapshot expiration is the first consumer. Wiring lives entirely in SchedulerConfig.binPackers; the runner, the Bin abstraction, and the per-table stats lookup all work for the new type with no change. SNAPSHOT_EXPIRATION is added to model.OperationType and db.OperationType on this PR only — the api wire enum is unchanged. When the upstream layers adopt the new operation type, the duplicate enum constants will reconcile. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../scheduler/SingletonBinPacker.java | 27 ++++++++ .../scheduler/config/SchedulerConfig.java | 5 +- .../scheduler/SingletonBinPackerTest.java | 62 +++++++++++++++++++ .../openhouse/optimizer/db/OperationType.java | 5 +- .../optimizer/model/OperationType.java | 5 +- 5 files changed, 101 insertions(+), 3 deletions(-) create mode 100644 apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SingletonBinPacker.java create mode 100644 apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SingletonBinPackerTest.java diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SingletonBinPacker.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SingletonBinPacker.java new file mode 100644 index 000000000..7b03b1ef4 --- /dev/null +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SingletonBinPacker.java @@ -0,0 +1,27 @@ +package com.linkedin.openhouse.scheduler; + +import com.linkedin.openhouse.optimizer.model.OperationType; +import java.util.List; +import java.util.stream.Collectors; +import lombok.RequiredArgsConstructor; + +/** + * Bin packer that emits one bin per operation — no batching. Suitable for operation types whose + * Spark jobs aren't safe (or worth) running on multiple tables in a single submission, e.g. + * snapshot expiration. + * + *

Ignores the {@link com.linkedin.openhouse.optimizer.model.TableStats} attached to each + * candidate: cost has no effect on the packing decision because every operation is its own bin. + */ +@RequiredArgsConstructor +public class SingletonBinPacker implements BinPacker { + + private final OperationType operationType; + + @Override + public List pack(List pending) { + return pending.stream() + .map(c -> new Bin(operationType, List.of(c.getOperation()))) + .collect(Collectors.toList()); + } +} diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/config/SchedulerConfig.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/config/SchedulerConfig.java index 56799f433..4c6f7163f 100644 --- a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/config/SchedulerConfig.java +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/config/SchedulerConfig.java @@ -3,6 +3,7 @@ import com.linkedin.openhouse.optimizer.model.OperationType; import com.linkedin.openhouse.scheduler.BinPacker; import com.linkedin.openhouse.scheduler.FileCountBinPacker; +import com.linkedin.openhouse.scheduler.SingletonBinPacker; import com.linkedin.openhouse.scheduler.client.JobsServiceClient; import java.util.Map; import org.springframework.beans.factory.annotation.Value; @@ -41,6 +42,8 @@ public JobsServiceClient jobsServiceClient(WebClient jobsWebClient) { public Map binPackers() { return Map.of( OperationType.ORPHAN_FILES_DELETION, - new FileCountBinPacker(OperationType.ORPHAN_FILES_DELETION, ofdMaxFilesPerBin)); + new FileCountBinPacker(OperationType.ORPHAN_FILES_DELETION, ofdMaxFilesPerBin), + OperationType.SNAPSHOT_EXPIRATION, + new SingletonBinPacker(OperationType.SNAPSHOT_EXPIRATION)); } } diff --git a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SingletonBinPackerTest.java b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SingletonBinPackerTest.java new file mode 100644 index 000000000..7a91120d5 --- /dev/null +++ b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SingletonBinPackerTest.java @@ -0,0 +1,62 @@ +package com.linkedin.openhouse.scheduler; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.linkedin.openhouse.optimizer.model.OperationType; +import com.linkedin.openhouse.optimizer.model.TableOperation; +import com.linkedin.openhouse.optimizer.model.TableStats; +import java.util.List; +import java.util.UUID; +import java.util.stream.Collectors; +import org.junit.jupiter.api.Test; + +class SingletonBinPackerTest { + + private final SingletonBinPacker packer = + new SingletonBinPacker(OperationType.SNAPSHOT_EXPIRATION); + + private static SchedulingCandidate candidate(String uuid) { + TableOperation op = + TableOperation.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(uuid) + .databaseName("db") + .tableName("tbl_" + uuid) + .operationType(OperationType.SNAPSHOT_EXPIRATION) + .build(); + TableStats stats = + TableStats.builder() + .tableUuid(uuid) + .snapshot(TableStats.SnapshotMetrics.builder().numCurrentFiles(0L).build()) + .build(); + return new SchedulingCandidate(op, stats); + } + + @Test + void emptyInput_returnsEmptyBins() { + assertThat(packer.pack(List.of())).isEmpty(); + } + + @Test + void everyCandidateGetsItsOwnBin() { + SchedulingCandidate a = candidate("a"); + SchedulingCandidate b = candidate("b"); + SchedulingCandidate c = candidate("c"); + + List bins = packer.pack(List.of(a, b, c)); + + assertThat(bins).hasSize(3); + bins.forEach(bin -> assertThat(bin.getOperations()).hasSize(1)); + List uuids = + bins.stream() + .map(bin -> bin.getOperations().get(0).getTableUuid()) + .collect(Collectors.toList()); + assertThat(uuids).containsExactly("a", "b", "c"); + } + + @Test + void binsCarryConfiguredOperationType() { + List bins = packer.pack(List.of(candidate("u"))); + assertThat(bins.get(0).getOperationType()).isEqualTo(OperationType.SNAPSHOT_EXPIRATION); + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationType.java index e4caf549b..c725121bd 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationType.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationType.java @@ -10,5 +10,8 @@ public enum OperationType { /** Removes orphaned data files no longer referenced by table metadata. */ - ORPHAN_FILES_DELETION + ORPHAN_FILES_DELETION, + + /** Expires old Iceberg snapshots past the table's retention policy. */ + SNAPSHOT_EXPIRATION } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java index 13c7e9c61..7230bff41 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java @@ -8,7 +8,10 @@ public enum OperationType { /** Removes orphaned data files no longer referenced by table metadata. */ - ORPHAN_FILES_DELETION; + ORPHAN_FILES_DELETION, + + /** Expires old Iceberg snapshots past the table's retention policy. */ + SNAPSHOT_EXPIRATION; /** Convert to the DB-layer counterpart. */ public com.linkedin.openhouse.optimizer.db.OperationType toDb() { From d9ffe4872a92f0da0907ecd070782b77374e5e1a Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 18 May 2026 15:12:02 -0700 Subject: [PATCH 13/19] docs(scheduler): strip editorial commentary from SingletonBinPacker javadoc Removed the "suitable for ... e.g. snapshot expiration" framing; the javadoc now states only what the class does. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../linkedin/openhouse/scheduler/SingletonBinPacker.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SingletonBinPacker.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SingletonBinPacker.java index 7b03b1ef4..9f8b202ad 100644 --- a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SingletonBinPacker.java +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SingletonBinPacker.java @@ -6,12 +6,10 @@ import lombok.RequiredArgsConstructor; /** - * Bin packer that emits one bin per operation — no batching. Suitable for operation types whose - * Spark jobs aren't safe (or worth) running on multiple tables in a single submission, e.g. - * snapshot expiration. + * Bin packer that emits one bin per operation. * *

Ignores the {@link com.linkedin.openhouse.optimizer.model.TableStats} attached to each - * candidate: cost has no effect on the packing decision because every operation is its own bin. + * candidate. */ @RequiredArgsConstructor public class SingletonBinPacker implements BinPacker { From ad88893c3554fa7f8db711c62858113adc67e1ba Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 18 May 2026 16:17:40 -0700 Subject: [PATCH 14/19] Revert "docs(scheduler): strip editorial commentary from SingletonBinPacker javadoc" This reverts commit d9ffe4872a92f0da0907ecd070782b77374e5e1a. --- .../linkedin/openhouse/scheduler/SingletonBinPacker.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SingletonBinPacker.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SingletonBinPacker.java index 9f8b202ad..7b03b1ef4 100644 --- a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SingletonBinPacker.java +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SingletonBinPacker.java @@ -6,10 +6,12 @@ import lombok.RequiredArgsConstructor; /** - * Bin packer that emits one bin per operation. + * Bin packer that emits one bin per operation — no batching. Suitable for operation types whose + * Spark jobs aren't safe (or worth) running on multiple tables in a single submission, e.g. + * snapshot expiration. * *

Ignores the {@link com.linkedin.openhouse.optimizer.model.TableStats} attached to each - * candidate. + * candidate: cost has no effect on the packing decision because every operation is its own bin. */ @RequiredArgsConstructor public class SingletonBinPacker implements BinPacker { From ac7f84de5d47d66aa20f5a05dede9f0df1fd2f4f Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 18 May 2026 16:17:40 -0700 Subject: [PATCH 15/19] Revert "feat(scheduler): add SingletonBinPacker; register for SNAPSHOT_EXPIRATION" This reverts commit ad1bf4c9491a9b44e6d7fda53fc59ab297f6d6b4. --- .../scheduler/SingletonBinPacker.java | 27 -------- .../scheduler/config/SchedulerConfig.java | 5 +- .../scheduler/SingletonBinPackerTest.java | 62 ------------------- .../openhouse/optimizer/db/OperationType.java | 5 +- .../optimizer/model/OperationType.java | 5 +- 5 files changed, 3 insertions(+), 101 deletions(-) delete mode 100644 apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SingletonBinPacker.java delete mode 100644 apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SingletonBinPackerTest.java diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SingletonBinPacker.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SingletonBinPacker.java deleted file mode 100644 index 7b03b1ef4..000000000 --- a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SingletonBinPacker.java +++ /dev/null @@ -1,27 +0,0 @@ -package com.linkedin.openhouse.scheduler; - -import com.linkedin.openhouse.optimizer.model.OperationType; -import java.util.List; -import java.util.stream.Collectors; -import lombok.RequiredArgsConstructor; - -/** - * Bin packer that emits one bin per operation — no batching. Suitable for operation types whose - * Spark jobs aren't safe (or worth) running on multiple tables in a single submission, e.g. - * snapshot expiration. - * - *

Ignores the {@link com.linkedin.openhouse.optimizer.model.TableStats} attached to each - * candidate: cost has no effect on the packing decision because every operation is its own bin. - */ -@RequiredArgsConstructor -public class SingletonBinPacker implements BinPacker { - - private final OperationType operationType; - - @Override - public List pack(List pending) { - return pending.stream() - .map(c -> new Bin(operationType, List.of(c.getOperation()))) - .collect(Collectors.toList()); - } -} diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/config/SchedulerConfig.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/config/SchedulerConfig.java index 4c6f7163f..56799f433 100644 --- a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/config/SchedulerConfig.java +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/config/SchedulerConfig.java @@ -3,7 +3,6 @@ import com.linkedin.openhouse.optimizer.model.OperationType; import com.linkedin.openhouse.scheduler.BinPacker; import com.linkedin.openhouse.scheduler.FileCountBinPacker; -import com.linkedin.openhouse.scheduler.SingletonBinPacker; import com.linkedin.openhouse.scheduler.client.JobsServiceClient; import java.util.Map; import org.springframework.beans.factory.annotation.Value; @@ -42,8 +41,6 @@ public JobsServiceClient jobsServiceClient(WebClient jobsWebClient) { public Map binPackers() { return Map.of( OperationType.ORPHAN_FILES_DELETION, - new FileCountBinPacker(OperationType.ORPHAN_FILES_DELETION, ofdMaxFilesPerBin), - OperationType.SNAPSHOT_EXPIRATION, - new SingletonBinPacker(OperationType.SNAPSHOT_EXPIRATION)); + new FileCountBinPacker(OperationType.ORPHAN_FILES_DELETION, ofdMaxFilesPerBin)); } } diff --git a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SingletonBinPackerTest.java b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SingletonBinPackerTest.java deleted file mode 100644 index 7a91120d5..000000000 --- a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SingletonBinPackerTest.java +++ /dev/null @@ -1,62 +0,0 @@ -package com.linkedin.openhouse.scheduler; - -import static org.assertj.core.api.Assertions.assertThat; - -import com.linkedin.openhouse.optimizer.model.OperationType; -import com.linkedin.openhouse.optimizer.model.TableOperation; -import com.linkedin.openhouse.optimizer.model.TableStats; -import java.util.List; -import java.util.UUID; -import java.util.stream.Collectors; -import org.junit.jupiter.api.Test; - -class SingletonBinPackerTest { - - private final SingletonBinPacker packer = - new SingletonBinPacker(OperationType.SNAPSHOT_EXPIRATION); - - private static SchedulingCandidate candidate(String uuid) { - TableOperation op = - TableOperation.builder() - .id(UUID.randomUUID().toString()) - .tableUuid(uuid) - .databaseName("db") - .tableName("tbl_" + uuid) - .operationType(OperationType.SNAPSHOT_EXPIRATION) - .build(); - TableStats stats = - TableStats.builder() - .tableUuid(uuid) - .snapshot(TableStats.SnapshotMetrics.builder().numCurrentFiles(0L).build()) - .build(); - return new SchedulingCandidate(op, stats); - } - - @Test - void emptyInput_returnsEmptyBins() { - assertThat(packer.pack(List.of())).isEmpty(); - } - - @Test - void everyCandidateGetsItsOwnBin() { - SchedulingCandidate a = candidate("a"); - SchedulingCandidate b = candidate("b"); - SchedulingCandidate c = candidate("c"); - - List bins = packer.pack(List.of(a, b, c)); - - assertThat(bins).hasSize(3); - bins.forEach(bin -> assertThat(bin.getOperations()).hasSize(1)); - List uuids = - bins.stream() - .map(bin -> bin.getOperations().get(0).getTableUuid()) - .collect(Collectors.toList()); - assertThat(uuids).containsExactly("a", "b", "c"); - } - - @Test - void binsCarryConfiguredOperationType() { - List bins = packer.pack(List.of(candidate("u"))); - assertThat(bins.get(0).getOperationType()).isEqualTo(OperationType.SNAPSHOT_EXPIRATION); - } -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationType.java index c725121bd..e4caf549b 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationType.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationType.java @@ -10,8 +10,5 @@ public enum OperationType { /** Removes orphaned data files no longer referenced by table metadata. */ - ORPHAN_FILES_DELETION, - - /** Expires old Iceberg snapshots past the table's retention policy. */ - SNAPSHOT_EXPIRATION + ORPHAN_FILES_DELETION } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java index 7230bff41..13c7e9c61 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java @@ -8,10 +8,7 @@ public enum OperationType { /** Removes orphaned data files no longer referenced by table metadata. */ - ORPHAN_FILES_DELETION, - - /** Expires old Iceberg snapshots past the table's retention policy. */ - SNAPSHOT_EXPIRATION; + ORPHAN_FILES_DELETION; /** Convert to the DB-layer counterpart. */ public com.linkedin.openhouse.optimizer.db.OperationType toDb() { From 0b87381152ead85e7ee7cb5f5b791bbb9fdca5ef Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 09:40:23 -0700 Subject: [PATCH 16/19] fix(scheduler): point @EntityScan at the actual db package SchedulerApplication scanned com.linkedin.openhouse.optimizer.entity for JPA-managed entities, but the entities live in com.linkedin.openhouse.optimizer.db. The app crashed at startup against a real database (MySQL backend in docker) with "Not a managed type: class ...db.TableStatsRow". Unit tests didn't catch it because they configure JPA differently. Co-Authored-By: Claude Opus 4.7 --- .../com/linkedin/openhouse/scheduler/SchedulerApplication.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerApplication.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerApplication.java index b28d1b73c..0b3b836d3 100644 --- a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerApplication.java +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerApplication.java @@ -11,7 +11,7 @@ /** Entry point for the Optimizer Scheduler application. */ @SpringBootApplication -@EntityScan(basePackages = "com.linkedin.openhouse.optimizer.entity") +@EntityScan(basePackages = "com.linkedin.openhouse.optimizer.db") @EnableJpaRepositories(basePackages = "com.linkedin.openhouse.optimizer.repository") public class SchedulerApplication { From f6c46742334b7a4956caccd03c9673db4785d608 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 14:59:45 -0700 Subject: [PATCH 17/19] refactor(scheduler): update model refs after Dto suffix swap Adds Dto suffix to model type refs in the scheduler app: TableOperationDto, TableStatsDto, OperationTypeDto, etc. Also fixes one perl-overreach: SchedulerRunner.PENDING was the db.OperationStatus enum value (not model), restored to OperationStatus. Mechanical follow-up to the opt-0 rename (b31decf8). Co-Authored-By: Claude Opus 4.7 --- .../com/linkedin/openhouse/scheduler/Bin.java | 12 ++++---- .../openhouse/scheduler/BinPacker.java | 4 +-- .../scheduler/FileCountBinPacker.java | 21 ++++++------- .../scheduler/SchedulerApplication.java | 5 ++-- .../openhouse/scheduler/SchedulerRunner.java | 30 +++++++++---------- .../scheduler/SchedulingCandidate.java | 8 ++--- .../scheduler/config/SchedulerConfig.java | 10 +++---- .../scheduler/FileCountBinPackerTest.java | 24 +++++++-------- .../scheduler/SchedulerRunnerTest.java | 4 +-- 9 files changed, 60 insertions(+), 58 deletions(-) diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/Bin.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/Bin.java index f61315bfb..f72a3eaa8 100644 --- a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/Bin.java +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/Bin.java @@ -1,7 +1,7 @@ package com.linkedin.openhouse.scheduler; -import com.linkedin.openhouse.optimizer.model.OperationType; -import com.linkedin.openhouse.optimizer.model.TableOperation; +import com.linkedin.openhouse.optimizer.model.OperationTypeDto; +import com.linkedin.openhouse.optimizer.model.TableOperationDto; import com.linkedin.openhouse.scheduler.client.JobsServiceClient; import java.time.Instant; import java.util.Collection; @@ -22,12 +22,12 @@ @RequiredArgsConstructor public class Bin { - @Getter private final OperationType operationType; - @Getter private final List operations; + @Getter private final OperationTypeDto operationType; + @Getter private final List operations; /** Operation UUIDs in this bin, parallel to {@link #getTableNames()}. */ public List getOperationIds() { - return operations.stream().map(TableOperation::getId).collect(Collectors.toList()); + return operations.stream().map(TableOperationDto::getId).collect(Collectors.toList()); } /** Fully-qualified {@code database.table} identifiers for the operations in this bin. */ @@ -43,7 +43,7 @@ public List getTableNames() { */ public Bin subset(Collection keepIds) { Set keep = new HashSet<>(keepIds); - List filtered = + List filtered = operations.stream().filter(op -> keep.contains(op.getId())).collect(Collectors.toList()); return new Bin(operationType, filtered); } diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/BinPacker.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/BinPacker.java index 90ac2faf3..504c2b910 100644 --- a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/BinPacker.java +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/BinPacker.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.scheduler; -import com.linkedin.openhouse.optimizer.model.TableStats; +import com.linkedin.openhouse.optimizer.model.TableStatsDto; import java.util.List; /** @@ -9,7 +9,7 @@ * binding to an operation type is the responsibility of the scheduler configuration, not the * strategy class. * - *

{@link TableStats} is the cost source at the interface boundary, carried alongside each + *

{@link TableStatsDto} is the cost source at the interface boundary, carried alongside each * operation in a {@link SchedulingCandidate}. Implementations project the stats down to the minimal * data needed to make their packing decision (e.g. file count for OFD) and do not retain the full * stats payload in the returned bins. diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/FileCountBinPacker.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/FileCountBinPacker.java index 9110b0dea..23e15f1be 100644 --- a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/FileCountBinPacker.java +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/FileCountBinPacker.java @@ -1,8 +1,8 @@ package com.linkedin.openhouse.scheduler; -import com.linkedin.openhouse.optimizer.model.OperationType; -import com.linkedin.openhouse.optimizer.model.TableOperation; -import com.linkedin.openhouse.optimizer.model.TableStats; +import com.linkedin.openhouse.optimizer.model.OperationTypeDto; +import com.linkedin.openhouse.optimizer.model.TableOperationDto; +import com.linkedin.openhouse.optimizer.model.TableStatsDto; import java.util.ArrayList; import java.util.Comparator; import java.util.List; @@ -14,7 +14,7 @@ /** * Greedy first-fit-descending bin-packer keyed on per-table file count, projected from each - * candidate's {@link TableStats}. + * candidate's {@link TableStatsDto}. * *

Candidates are sorted by descending file count, then assigned to the first bin whose running * total stays at or below {@code maxFilesPerBin}. An operation larger than the limit gets its own @@ -23,7 +23,7 @@ @RequiredArgsConstructor public class FileCountBinPacker implements BinPacker { - private final OperationType operationType; + private final OperationTypeDto operationType; private final long maxFilesPerBin; @Override @@ -37,17 +37,18 @@ public List pack(List pending) { pending.stream() .collect(Collectors.toMap(c -> c.getOperation().getId(), c -> cost(c.getStats()))); - List sorted = + List sorted = pending.stream() .map(SchedulingCandidate::getOperation) .sorted( - Comparator.comparingLong((TableOperation op) -> costByOperationId.get(op.getId())) + Comparator.comparingLong( + (TableOperationDto op) -> costByOperationId.get(op.getId())) .reversed()) .collect(Collectors.toList()); // First-fit-descending is inherently stateful — each placement depends on the running totals // for bins assembled so far. - List> binContents = new ArrayList<>(); + List> binContents = new ArrayList<>(); List binTotals = new ArrayList<>(); sorted.forEach( op -> { @@ -61,7 +62,7 @@ public List pack(List pending) { binContents.get(idx).add(op); binTotals.set(idx, binTotals.get(idx) + c); } else { - List newBin = new ArrayList<>(); + List newBin = new ArrayList<>(); newBin.add(op); binContents.add(newBin); binTotals.add(c); @@ -73,7 +74,7 @@ public List pack(List pending) { .collect(Collectors.toList()); } - private static long cost(TableStats stats) { + private static long cost(TableStatsDto stats) { Long n = stats.getSnapshot().getNumCurrentFiles(); return n != null ? n : 0L; } diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerApplication.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerApplication.java index 0b3b836d3..9b6cb4579 100644 --- a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerApplication.java +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerApplication.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.scheduler; -import com.linkedin.openhouse.optimizer.model.OperationType; +import com.linkedin.openhouse.optimizer.model.OperationTypeDto; import java.util.Map; import org.springframework.boot.CommandLineRunner; import org.springframework.boot.SpringApplication; @@ -24,7 +24,8 @@ public static void main(String[] args) { * scoped to one operation type. */ @Bean - public CommandLineRunner run(SchedulerRunner runner, Map binPackers) { + public CommandLineRunner run( + SchedulerRunner runner, Map binPackers) { return args -> binPackers.keySet().forEach(runner::schedule); } } diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java index 124633aa3..27174c07e 100644 --- a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java @@ -3,9 +3,9 @@ import com.linkedin.openhouse.optimizer.db.OperationStatus; import com.linkedin.openhouse.optimizer.db.TableOperationsRow; import com.linkedin.openhouse.optimizer.db.TableStatsRow; -import com.linkedin.openhouse.optimizer.model.OperationType; -import com.linkedin.openhouse.optimizer.model.TableOperation; -import com.linkedin.openhouse.optimizer.model.TableStats; +import com.linkedin.openhouse.optimizer.model.OperationTypeDto; +import com.linkedin.openhouse.optimizer.model.TableOperationDto; +import com.linkedin.openhouse.optimizer.model.TableStatsDto; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; import com.linkedin.openhouse.scheduler.client.JobsServiceClient; @@ -32,14 +32,14 @@ public class SchedulerRunner { private final TableOperationsRepository operationsRepo; private final TableStatsRepository statsRepo; private final JobsServiceClient jobsClient; - private final Map binPackers; + private final Map binPackers; private final String resultsEndpoint; public SchedulerRunner( TableOperationsRepository operationsRepo, TableStatsRepository statsRepo, JobsServiceClient jobsClient, - Map binPackers, + Map binPackers, @Value("${scheduler.results-endpoint}") String resultsEndpoint) { this.operationsRepo = operationsRepo; this.statsRepo = statsRepo; @@ -49,7 +49,7 @@ public SchedulerRunner( } /** Schedule all PENDING operations of the given type across all databases. */ - public void schedule(OperationType operationType) { + public void schedule(OperationTypeDto operationType) { schedule(operationType, Optional.empty(), Optional.empty()); } @@ -59,7 +59,7 @@ public void schedule(OperationType operationType) { */ @Transactional public void schedule( - OperationType operationType, Optional databaseName, Optional tableName) { + OperationTypeDto operationType, Optional databaseName, Optional tableName) { BinPacker packer = binPackers.get(operationType); if (packer == null) { @@ -79,22 +79,22 @@ public void schedule( return; } - List pending = - pendingRows.stream().map(TableOperation::fromRow).collect(Collectors.toList()); + List pending = + pendingRows.stream().map(TableOperationDto::fromRow).collect(Collectors.toList()); // Tradeoff: we fetch fresh table_stats per scheduling cycle (one batched query) rather than - // denormalizing the relevant fields onto TableOperation. The denormalized alternative would - // remove the per-cycle lookup but widen the TableOperation row and serve staler data; the + // denormalizing the relevant fields onto TableOperationDto. The denormalized alternative would + // remove the per-cycle lookup but widen the TableOperationDto row and serve staler data; the // current shape favors smaller operations + freshness over fewer queries. Set uuids = - pending.stream().map(TableOperation::getTableUuid).collect(Collectors.toSet()); - Map statsByUuid = + pending.stream().map(TableOperationDto::getTableUuid).collect(Collectors.toSet()); + Map statsByUuid = statsRepo.findAllById(uuids).stream() - .collect(Collectors.toMap(TableStatsRow::getTableUuid, TableStats::fromRow)); + .collect(Collectors.toMap(TableStatsRow::getTableUuid, TableStatsDto::fromRow)); // Filter at the boundary so SchedulingCandidate.stats is guaranteed non-null. A table without // a stats row gets skipped this cycle and reconsidered after stats land. - List withStats = + List withStats = pending.stream() .filter(op -> statsByUuid.containsKey(op.getTableUuid())) .collect(Collectors.toList()); diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulingCandidate.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulingCandidate.java index 4aadaaabf..d375e6a6f 100644 --- a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulingCandidate.java +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulingCandidate.java @@ -1,7 +1,7 @@ package com.linkedin.openhouse.scheduler; -import com.linkedin.openhouse.optimizer.model.TableOperation; -import com.linkedin.openhouse.optimizer.model.TableStats; +import com.linkedin.openhouse.optimizer.model.TableOperationDto; +import com.linkedin.openhouse.optimizer.model.TableStatsDto; import lombok.NonNull; import lombok.Value; @@ -14,6 +14,6 @@ */ @Value public class SchedulingCandidate { - @NonNull TableOperation operation; - @NonNull TableStats stats; + @NonNull TableOperationDto operation; + @NonNull TableStatsDto stats; } diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/config/SchedulerConfig.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/config/SchedulerConfig.java index 56799f433..5ac86c070 100644 --- a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/config/SchedulerConfig.java +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/config/SchedulerConfig.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.scheduler.config; -import com.linkedin.openhouse.optimizer.model.OperationType; +import com.linkedin.openhouse.optimizer.model.OperationTypeDto; import com.linkedin.openhouse.scheduler.BinPacker; import com.linkedin.openhouse.scheduler.FileCountBinPacker; import com.linkedin.openhouse.scheduler.client.JobsServiceClient; @@ -33,14 +33,14 @@ public JobsServiceClient jobsServiceClient(WebClient jobsWebClient) { } /** - * Map of {@link OperationType} to the {@link BinPacker} strategy that handles it. Adding a new + * Map of {@link OperationTypeDto} to the {@link BinPacker} strategy that handles it. Adding a new * operation type means adding an entry here and configuring its packer; the strategy class itself * stays generic. */ @Bean - public Map binPackers() { + public Map binPackers() { return Map.of( - OperationType.ORPHAN_FILES_DELETION, - new FileCountBinPacker(OperationType.ORPHAN_FILES_DELETION, ofdMaxFilesPerBin)); + OperationTypeDto.ORPHAN_FILES_DELETION, + new FileCountBinPacker(OperationTypeDto.ORPHAN_FILES_DELETION, ofdMaxFilesPerBin)); } } diff --git a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/FileCountBinPackerTest.java b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/FileCountBinPackerTest.java index 7185494b5..2cda81bd9 100644 --- a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/FileCountBinPackerTest.java +++ b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/FileCountBinPackerTest.java @@ -2,9 +2,9 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.model.OperationType; -import com.linkedin.openhouse.optimizer.model.TableOperation; -import com.linkedin.openhouse.optimizer.model.TableStats; +import com.linkedin.openhouse.optimizer.model.OperationTypeDto; +import com.linkedin.openhouse.optimizer.model.TableOperationDto; +import com.linkedin.openhouse.optimizer.model.TableStatsDto; import java.util.List; import java.util.UUID; import java.util.stream.Collectors; @@ -14,21 +14,21 @@ class FileCountBinPackerTest { private static final long MAX = 1_000_000L; private final FileCountBinPacker packer = - new FileCountBinPacker(OperationType.ORPHAN_FILES_DELETION, MAX); + new FileCountBinPacker(OperationTypeDto.ORPHAN_FILES_DELETION, MAX); - private static TableOperation op(String uuid) { - return TableOperation.builder() + private static TableOperationDto op(String uuid) { + return TableOperationDto.builder() .id(UUID.randomUUID().toString()) .tableUuid(uuid) .databaseName("db") .tableName("tbl_" + uuid) - .operationType(OperationType.ORPHAN_FILES_DELETION) + .operationType(OperationTypeDto.ORPHAN_FILES_DELETION) .build(); } - private static TableStats stats(Long fileCount) { - return TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.builder().numCurrentFiles(fileCount).build()) + private static TableStatsDto stats(Long fileCount) { + return TableStatsDto.builder() + .snapshot(TableStatsDto.SnapshotMetrics.builder().numCurrentFiles(fileCount).build()) .build(); } @@ -91,7 +91,7 @@ void sortedDescending_largestFirst() { assertThat(bins).hasSize(1); List ordered = bins.get(0).getOperations().stream() - .map(TableOperation::getTableUuid) + .map(TableOperationDto::getTableUuid) .collect(Collectors.toList()); assertThat(ordered).containsExactly("large", "small"); } @@ -99,6 +99,6 @@ void sortedDescending_largestFirst() { @Test void binCarriesOperationType() { List bins = packer.pack(List.of(candidate("u", 1L))); - assertThat(bins.get(0).getOperationType()).isEqualTo(OperationType.ORPHAN_FILES_DELETION); + assertThat(bins.get(0).getOperationType()).isEqualTo(OperationTypeDto.ORPHAN_FILES_DELETION); } } diff --git a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java index 7722a6156..24997d900 100644 --- a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java +++ b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java @@ -13,7 +13,7 @@ import com.linkedin.openhouse.optimizer.db.SnapshotMetrics; import com.linkedin.openhouse.optimizer.db.TableOperationsRow; import com.linkedin.openhouse.optimizer.db.TableStatsRow; -import com.linkedin.openhouse.optimizer.model.OperationType; +import com.linkedin.openhouse.optimizer.model.OperationTypeDto; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; import com.linkedin.openhouse.scheduler.client.JobsServiceClient; @@ -32,7 +32,7 @@ @ExtendWith(MockitoExtension.class) class SchedulerRunnerTest { - private static final OperationType OFD = OperationType.ORPHAN_FILES_DELETION; + private static final OperationTypeDto OFD = OperationTypeDto.ORPHAN_FILES_DELETION; private static final com.linkedin.openhouse.optimizer.db.OperationType OFD_DB = com.linkedin.openhouse.optimizer.db.OperationType.ORPHAN_FILES_DELETION; private static final com.linkedin.openhouse.optimizer.db.OperationStatus PENDING_DB = From a89a60054c4a96a76fa94ece91c64aee1c970567 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 15:19:39 -0700 Subject: [PATCH 18/19] fix(scheduler): four runtime gaps surfaced by docker e2e MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Relocated from opt-5's commit 1ec3acf0 to their proper owner (opt-4 introduced these files): 1. FileCountBinPacker.cost() NPE on stats with null snapshot. Analyzer can schedule against a fresh table with no snapshot data; treat that as cost 0 instead of crashing. 2. SchedulerRunner.schedule(OperationType) was non-@Transactional; the 3-arg overload was annotated but self-invocation bypasses the CGLIB proxy, so @Modifying repo calls hit TransactionRequiredException. Annotate the no-arg variant too. 3. scheduler.results-endpoint default URL: stale /v1/table-operations → /v1/optimizer/operations (where the controller actually mounts). Both application.properties and application-test.properties. 4. SchedulerRunnerTest's RESULTS_ENDPOINT constant: same URL fix. Co-Authored-By: Claude Opus 4.7 --- .../com/linkedin/openhouse/scheduler/FileCountBinPacker.java | 3 +++ .../java/com/linkedin/openhouse/scheduler/SchedulerRunner.java | 1 + .../src/main/resources/application.properties | 2 +- .../com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java | 2 +- .../src/test/resources/application-test.properties | 2 +- 5 files changed, 7 insertions(+), 3 deletions(-) diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/FileCountBinPacker.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/FileCountBinPacker.java index 23e15f1be..c8c5e83fe 100644 --- a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/FileCountBinPacker.java +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/FileCountBinPacker.java @@ -75,6 +75,9 @@ public List pack(List pending) { } private static long cost(TableStatsDto stats) { + if (stats == null || stats.getSnapshot() == null) { + return 0L; + } Long n = stats.getSnapshot().getNumCurrentFiles(); return n != null ? n : 0L; } diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java index 27174c07e..883d6ac19 100644 --- a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java @@ -49,6 +49,7 @@ public SchedulerRunner( } /** Schedule all PENDING operations of the given type across all databases. */ + @Transactional public void schedule(OperationTypeDto operationType) { schedule(operationType, Optional.empty(), Optional.empty()); } diff --git a/apps/optimizer-scheduler/src/main/resources/application.properties b/apps/optimizer-scheduler/src/main/resources/application.properties index 4d849797c..bdaf4cbe6 100644 --- a/apps/optimizer-scheduler/src/main/resources/application.properties +++ b/apps/optimizer-scheduler/src/main/resources/application.properties @@ -6,5 +6,5 @@ spring.datasource.password=${OPTIMIZER_DB_PASSWORD:} spring.jpa.hibernate.ddl-auto=none jobs.base-uri=${JOBS_BASE_URI:http://localhost:8002} scheduler.ofd.max-files-per-bin=${SCHEDULER_OFD_MAX_FILES_PER_BIN:1000000} -scheduler.results-endpoint=${SCHEDULER_RESULTS_ENDPOINT:http://openhouse-optimizer:8080/v1/table-operations} +scheduler.results-endpoint=${SCHEDULER_RESULTS_ENDPOINT:http://openhouse-optimizer:8080/v1/optimizer/operations} scheduler.cluster-id=${SCHEDULER_CLUSTER_ID:LocalHadoopCluster} diff --git a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java index 24997d900..1ff885be3 100644 --- a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java +++ b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java @@ -38,7 +38,7 @@ class SchedulerRunnerTest { private static final com.linkedin.openhouse.optimizer.db.OperationStatus PENDING_DB = com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING; private static final String OFD_STR = OFD.name(); - private static final String RESULTS_ENDPOINT = "http://localhost:8080/v1/table-operations"; + private static final String RESULTS_ENDPOINT = "http://localhost:8080/v1/optimizer/operations"; @Mock private TableOperationsRepository operationsRepo; @Mock private TableStatsRepository statsRepo; diff --git a/apps/optimizer-scheduler/src/test/resources/application-test.properties b/apps/optimizer-scheduler/src/test/resources/application-test.properties index 5904d71ba..35233829b 100644 --- a/apps/optimizer-scheduler/src/test/resources/application-test.properties +++ b/apps/optimizer-scheduler/src/test/resources/application-test.properties @@ -6,5 +6,5 @@ spring.sql.init.mode=always spring.sql.init.schema-locations=classpath:schema.sql jobs.base-uri=http://localhost:9999 scheduler.ofd.max-files-per-bin=1000000 -scheduler.results-endpoint=http://localhost:8080/v1/table-operations +scheduler.results-endpoint=http://localhost:8080/v1/optimizer/operations scheduler.cluster-id=test-cluster From 66aa3e7c3cd01182cfcf0728db9b507a46ae4b72 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 17:28:26 -0700 Subject: [PATCH 19/19] refactor(scheduler): switch to Optional repo API + dedup per cycle SchedulerRunner now uses the unified find(...) / updateBatch(...) / cancel(...) shape. The CAS calls become updateBatch(ids, fromStatus, toStatus, Optional, Optional) for each transition. findClaimedIds is replaced by a find(...) call keyed on (SCHEDULING, scheduledAt watermark, claimed-ids) that returns rows; caller extracts ids. Dedup move: cancelDuplicatePendingBatch was per-bin AND blind (NOT IN keepIds); two bins in the same cycle nuked each other's rows. The new shape is per-cycle, computed by the runner: group pendingRows by tableUuid, keep the oldest per group (lex tiebreak on id), and call repo.cancel(duplicateIds) once before bin packing. optimizer.repo.default-limit threads through via @Value; field is initialised inline so pure-Mockito tests see a sane value. Co-Authored-By: Claude Opus 4.7 --- .../openhouse/scheduler/SchedulerRunner.java | 104 +++++++-- .../src/main/resources/application.properties | 1 + .../scheduler/SchedulerRunnerTest.java | 219 ++++++++++++------ 3 files changed, 245 insertions(+), 79 deletions(-) diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java index 883d6ac19..25cee0f46 100644 --- a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java @@ -10,6 +10,7 @@ import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; import com.linkedin.openhouse.scheduler.client.JobsServiceClient; import java.time.Instant; +import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.Optional; @@ -17,6 +18,7 @@ import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Value; +import org.springframework.data.domain.PageRequest; import org.springframework.stereotype.Component; import org.springframework.transaction.annotation.Transactional; @@ -35,6 +37,9 @@ public class SchedulerRunner { private final Map binPackers; private final String resultsEndpoint; + @Value("${optimizer.repo.default-limit:10000}") + private int defaultLimit = 10_000; + public SchedulerRunner( TableOperationsRepository operationsRepo, TableStatsRepository statsRepo, @@ -68,20 +73,32 @@ public void schedule( "No BinPacker registered for operation type " + operationType); } + PageRequest page = PageRequest.of(0, defaultLimit); List pendingRows = operationsRepo.find( - operationType.toDb(), - OperationStatus.PENDING, - null, - databaseName.orElse(null), - tableName.orElse(null)); + Optional.of(operationType.toDb()), + Optional.of(OperationStatus.PENDING), + Optional.empty(), + databaseName, + tableName, + Optional.empty(), + Optional.empty(), + page); if (pendingRows.isEmpty()) { log.info("No PENDING operations of type {}; nothing to schedule", operationType); return; } + // Deduplicate before claiming: if multiple PENDING rows exist for the same tableUuid, keep + // the oldest (lex-tiebreak on id) and cancel the rest. Per-cycle, not per-bin — running this + // inside the bin loop nuked rows belonging to other bins of the same cycle. + List survivors = cancelDuplicates(pendingRows); + if (survivors.isEmpty()) { + return; + } + List pending = - pendingRows.stream().map(TableOperationDto::fromRow).collect(Collectors.toList()); + survivors.stream().map(TableOperationDto::fromRow).collect(Collectors.toList()); // Tradeoff: we fetch fresh table_stats per scheduling cycle (one batched query) rather than // denormalizing the relevant fields onto TableOperationDto. The denormalized alternative would @@ -124,19 +141,68 @@ public void schedule( bins.forEach(this::submitBin); } + /** + * Group {@code pendingRows} by {@code tableUuid}; for any group with more than one row, cancel + * all but the oldest (lex-tiebreak on id). Returns the survivors in input order. Deterministic. + */ + private List cancelDuplicates(List pendingRows) { + Map> byTableUuid = + pendingRows.stream().collect(Collectors.groupingBy(TableOperationsRow::getTableUuid)); + + List duplicateIds = + byTableUuid.values().stream() + .filter(rows -> rows.size() > 1) + .flatMap( + rows -> + rows.stream() + .sorted( + Comparator.comparing(TableOperationsRow::getCreatedAt) + .thenComparing(TableOperationsRow::getId)) + .skip(1)) + .map(TableOperationsRow::getId) + .collect(Collectors.toList()); + + if (duplicateIds.isEmpty()) { + return pendingRows; + } + + int cancelled = operationsRepo.cancel(duplicateIds); + log.warn("Cancelled {} duplicate PENDING rows", cancelled); + + Set cancelledIds = Set.copyOf(duplicateIds); + return pendingRows.stream() + .filter(r -> !cancelledIds.contains(r.getId())) + .collect(Collectors.toList()); + } + private void submitBin(Bin bin) { List ids = bin.getOperationIds(); - // Deduplicate PENDING rows per tableUuid for this op type, keeping the IDs in this bin. - operationsRepo.cancelDuplicatePendingBatch(bin.getOperationType().toDb(), ids); - // Claim the rows in one batched UPDATE: PENDING → SCHEDULING. The UPDATE's row count is just // an aggregate — to know *which* rows we own, re-query for SCHEDULING rows tagged with our // scheduledAt watermark. Anything not in that subset belongs to another instance or was // canceled, and must not be submitted or marked SCHEDULED. Instant claimedAt = Instant.now(); - operationsRepo.markSchedulingBatch(ids, claimedAt); - List claimedIds = operationsRepo.findClaimedIds(ids, claimedAt); + operationsRepo.updateBatch( + ids, + OperationStatus.PENDING, + OperationStatus.SCHEDULING, + Optional.of(claimedAt), + Optional.empty()); + List claimedIds = + operationsRepo + .find( + Optional.empty(), + Optional.of(OperationStatus.SCHEDULING), + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.of(claimedAt), + Optional.of(ids), + PageRequest.of(0, defaultLimit)) + .stream() + .map(TableOperationsRow::getId) + .collect(Collectors.toList()); if (claimedIds.isEmpty()) { log.info("All rows in bin already claimed by another scheduler instance; skipping"); return; @@ -151,14 +217,26 @@ private void submitBin(Bin bin) { Bin claimedBin = bin.subset(claimedIds); Optional jobId = claimedBin.schedule(jobsClient, resultsEndpoint); if (jobId.isPresent()) { - int updated = operationsRepo.markScheduledBatch(claimedIds, jobId.get()); + int updated = + operationsRepo.updateBatch( + claimedIds, + OperationStatus.SCHEDULING, + OperationStatus.SCHEDULED, + Optional.empty(), + Optional.of(jobId.get())); log.info( "Submitted job {} for {} tables ({} rows marked SCHEDULED)", jobId.get(), claimedBin.getOperations().size(), updated); } else { - int reverted = operationsRepo.markPendingBatch(claimedIds); + int reverted = + operationsRepo.updateBatch( + claimedIds, + OperationStatus.SCHEDULING, + OperationStatus.PENDING, + Optional.empty(), + Optional.empty()); log.warn( "Job submission failed; reverted {} claimed rows back to PENDING for retry on the next" + " pass", diff --git a/apps/optimizer-scheduler/src/main/resources/application.properties b/apps/optimizer-scheduler/src/main/resources/application.properties index bdaf4cbe6..442cd98a4 100644 --- a/apps/optimizer-scheduler/src/main/resources/application.properties +++ b/apps/optimizer-scheduler/src/main/resources/application.properties @@ -8,3 +8,4 @@ jobs.base-uri=${JOBS_BASE_URI:http://localhost:8002} scheduler.ofd.max-files-per-bin=${SCHEDULER_OFD_MAX_FILES_PER_BIN:1000000} scheduler.results-endpoint=${SCHEDULER_RESULTS_ENDPOINT:http://openhouse-optimizer:8080/v1/optimizer/operations} scheduler.cluster-id=${SCHEDULER_CLUSTER_ID:LocalHadoopCluster} +optimizer.repo.default-limit=${OPTIMIZER_REPO_DEFAULT_LIMIT:10000} diff --git a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java index 1ff885be3..71e91eb39 100644 --- a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java +++ b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java @@ -10,6 +10,7 @@ import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; +import com.linkedin.openhouse.optimizer.db.OperationStatus; import com.linkedin.openhouse.optimizer.db.SnapshotMetrics; import com.linkedin.openhouse.optimizer.db.TableOperationsRow; import com.linkedin.openhouse.optimizer.db.TableStatsRow; @@ -17,6 +18,7 @@ import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; import com.linkedin.openhouse.scheduler.client.JobsServiceClient; +import java.time.Instant; import java.util.List; import java.util.Map; import java.util.Optional; @@ -35,8 +37,6 @@ class SchedulerRunnerTest { private static final OperationTypeDto OFD = OperationTypeDto.ORPHAN_FILES_DELETION; private static final com.linkedin.openhouse.optimizer.db.OperationType OFD_DB = com.linkedin.openhouse.optimizer.db.OperationType.ORPHAN_FILES_DELETION; - private static final com.linkedin.openhouse.optimizer.db.OperationStatus PENDING_DB = - com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING; private static final String OFD_STR = OFD.name(); private static final String RESULTS_ENDPOINT = "http://localhost:8080/v1/optimizer/operations"; @@ -54,6 +54,49 @@ void setUp() { operationsRepo, statsRepo, jobsClient, Map.of(OFD, binPacker), RESULTS_ENDPOINT); } + // ---- Stubbing helpers ---- + + /** Stubs the initial "find PENDING" call. */ + private void stubFindPending(List rows) { + when(operationsRepo.find( + eq(Optional.of(OFD_DB)), + eq(Optional.of(OperationStatus.PENDING)), + eq(Optional.empty()), + eq(Optional.empty()), + eq(Optional.empty()), + eq(Optional.empty()), + eq(Optional.empty()), + any())) + .thenReturn(rows); + } + + /** Stubs the post-claim "find SCHEDULING" call. */ + private void stubFindClaimed(List rows) { + when(operationsRepo.find( + eq(Optional.empty()), + eq(Optional.of(OperationStatus.SCHEDULING)), + eq(Optional.empty()), + eq(Optional.empty()), + eq(Optional.empty()), + any(), + any(), + any())) + .thenReturn(rows); + } + + /** Stubs the bin packer to return one bin containing every candidate. */ + private void stubOneBinForAllCandidates() { + when(binPacker.pack(anyList())) + .thenAnswer( + inv -> + List.of( + new Bin( + OFD, + inv.>getArgument(0).stream() + .map(SchedulingCandidate::getOperation) + .collect(Collectors.toList())))); + } + private TableOperationsRow pendingRow(String uuid, String db, String table) { return TableOperationsRow.builder() .id(UUID.randomUUID().toString()) @@ -61,11 +104,15 @@ private TableOperationsRow pendingRow(String uuid, String db, String table) { .databaseName(db) .tableName(table) .operationType(OFD_DB) - .status(PENDING_DB) - .createdAt(java.time.Instant.now()) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) .build(); } + private TableOperationsRow schedulingRow(TableOperationsRow source) { + return source.toBuilder().status(OperationStatus.SCHEDULING).build(); + } + private TableStatsRow statsRow(String uuid, long numCurrentFiles) { return TableStatsRow.builder() .tableUuid(uuid) @@ -73,22 +120,11 @@ private TableStatsRow statsRow(String uuid, long numCurrentFiles) { .build(); } - /** Stubs the bin packer to return one bin containing every candidate. */ - private void stubOneBinForAllCandidates() { - when(binPacker.pack(anyList())) - .thenAnswer( - inv -> - List.of( - new Bin( - OFD, - inv.>getArgument(0).stream() - .map(SchedulingCandidate::getOperation) - .collect(Collectors.toList())))); - } + // ---- Tests ---- @Test void schedule_noPendingOps_noJobSubmitted() { - when(operationsRepo.find(OFD_DB, PENDING_DB, null, null, null)).thenReturn(List.of()); + stubFindPending(List.of()); runner.schedule(OFD); @@ -105,7 +141,7 @@ void schedule_unknownOperationType_throws() { .isInstanceOf(IllegalStateException.class) .hasMessageContaining("No BinPacker registered"); - verify(operationsRepo, never()).find(any(), any(), any(), any(), any()); + verify(operationsRepo, never()).find(any(), any(), any(), any(), any(), any(), any(), any()); verify(jobsClient, never()).launch(anyString(), anyString(), anyList(), anyList(), anyString()); } @@ -114,23 +150,31 @@ void schedule_singleBin_claimsAndMarksScheduled() { String uuid = UUID.randomUUID().toString(); TableOperationsRow row = pendingRow(uuid, "db1", "tbl1"); - when(operationsRepo.find(OFD_DB, PENDING_DB, null, null, null)).thenReturn(List.of(row)); + stubFindPending(List.of(row)); when(statsRepo.findAllById(any())).thenReturn(List.of(statsRow(uuid, 100_000L))); stubOneBinForAllCandidates(); - when(operationsRepo.markSchedulingBatch(anyList(), any())).thenReturn(1); - when(operationsRepo.findClaimedIds(anyList(), any())).thenReturn(List.of(row.getId())); - when(operationsRepo.markScheduledBatch(anyList(), anyString())).thenReturn(1); + when(operationsRepo.updateBatch( + anyList(), eq(OperationStatus.PENDING), eq(OperationStatus.SCHEDULING), any(), any())) + .thenReturn(1); + stubFindClaimed(List.of(schedulingRow(row))); + when(operationsRepo.updateBatch( + anyList(), eq(OperationStatus.SCHEDULING), eq(OperationStatus.SCHEDULED), any(), any())) + .thenReturn(1); when(jobsClient.launch(anyString(), anyString(), anyList(), anyList(), anyString())) .thenReturn(Optional.of("job-123")); runner.schedule(OFD); - ArgumentCaptor> ids = ArgumentCaptor.forClass(List.class); - verify(operationsRepo).markSchedulingBatch(ids.capture(), any()); - assertThat(ids.getValue()).containsExactly(row.getId()); - - verify(operationsRepo).markScheduledBatch(eq(List.of(row.getId())), eq("job-123")); - verify(operationsRepo, never()).markPendingBatch(anyList()); + verify(operationsRepo) + .updateBatch( + eq(List.of(row.getId())), + eq(OperationStatus.SCHEDULING), + eq(OperationStatus.SCHEDULED), + eq(Optional.empty()), + eq(Optional.of("job-123"))); + verify(operationsRepo, never()) + .updateBatch( + anyList(), eq(OperationStatus.SCHEDULING), eq(OperationStatus.PENDING), any(), any()); ArgumentCaptor> tableNames = ArgumentCaptor.forClass(List.class); verify(jobsClient) @@ -143,20 +187,31 @@ void schedule_jobLaunchFails_marksPendingForRetry() { String uuid = UUID.randomUUID().toString(); TableOperationsRow row = pendingRow(uuid, "db1", "tbl1"); - when(operationsRepo.find(OFD_DB, PENDING_DB, null, null, null)).thenReturn(List.of(row)); + stubFindPending(List.of(row)); when(statsRepo.findAllById(any())).thenReturn(List.of(statsRow(uuid, 100L))); stubOneBinForAllCandidates(); - when(operationsRepo.markSchedulingBatch(anyList(), any())).thenReturn(1); - when(operationsRepo.findClaimedIds(anyList(), any())).thenReturn(List.of(row.getId())); + when(operationsRepo.updateBatch( + anyList(), eq(OperationStatus.PENDING), eq(OperationStatus.SCHEDULING), any(), any())) + .thenReturn(1); + stubFindClaimed(List.of(schedulingRow(row))); when(jobsClient.launch(anyString(), anyString(), anyList(), anyList(), anyString())) .thenReturn(Optional.empty()); - when(operationsRepo.markPendingBatch(anyList())).thenReturn(1); + when(operationsRepo.updateBatch( + anyList(), eq(OperationStatus.SCHEDULING), eq(OperationStatus.PENDING), any(), any())) + .thenReturn(1); runner.schedule(OFD); - verify(operationsRepo).markSchedulingBatch(eq(List.of(row.getId())), any()); - verify(operationsRepo).markPendingBatch(eq(List.of(row.getId()))); - verify(operationsRepo, never()).markScheduledBatch(anyList(), anyString()); + verify(operationsRepo) + .updateBatch( + eq(List.of(row.getId())), + eq(OperationStatus.SCHEDULING), + eq(OperationStatus.PENDING), + eq(Optional.empty()), + eq(Optional.empty())); + verify(operationsRepo, never()) + .updateBatch( + anyList(), eq(OperationStatus.SCHEDULING), eq(OperationStatus.SCHEDULED), any(), any()); } @Test @@ -164,38 +219,56 @@ void schedule_rowsAlreadyClaimed_skipsSubmit() { String uuid = UUID.randomUUID().toString(); TableOperationsRow row = pendingRow(uuid, "db1", "tbl1"); - when(operationsRepo.find(OFD_DB, PENDING_DB, null, null, null)).thenReturn(List.of(row)); + stubFindPending(List.of(row)); when(statsRepo.findAllById(any())).thenReturn(List.of(statsRow(uuid, 100L))); stubOneBinForAllCandidates(); - when(operationsRepo.markSchedulingBatch(anyList(), any())).thenReturn(0); - when(operationsRepo.findClaimedIds(anyList(), any())).thenReturn(List.of()); + when(operationsRepo.updateBatch( + anyList(), eq(OperationStatus.PENDING), eq(OperationStatus.SCHEDULING), any(), any())) + .thenReturn(0); + stubFindClaimed(List.of()); runner.schedule(OFD); verify(jobsClient, never()).launch(anyString(), anyString(), anyList(), anyList(), anyString()); - verify(operationsRepo, never()).markScheduledBatch(anyList(), anyString()); - verify(operationsRepo, never()).markPendingBatch(anyList()); + verify(operationsRepo, never()) + .updateBatch( + anyList(), eq(OperationStatus.SCHEDULING), eq(OperationStatus.SCHEDULED), any(), any()); + verify(operationsRepo, never()) + .updateBatch( + anyList(), eq(OperationStatus.SCHEDULING), eq(OperationStatus.PENDING), any(), any()); } @Test - void schedule_cancelsDuplicatePendingBeforeClaim() { + void schedule_cancelsDuplicatePendingPerCycle() { String uuid = UUID.randomUUID().toString(); TableOperationsRow row1 = pendingRow(uuid, "db1", "tbl1"); TableOperationsRow row2 = pendingRow(uuid, "db1", "tbl1"); - when(operationsRepo.find(OFD_DB, PENDING_DB, null, null, null)).thenReturn(List.of(row1, row2)); + stubFindPending(List.of(row1, row2)); + when(operationsRepo.cancel(anyList())).thenReturn(1); when(statsRepo.findAllById(any())).thenReturn(List.of(statsRow(uuid, 100L))); stubOneBinForAllCandidates(); - when(operationsRepo.markSchedulingBatch(anyList(), any())).thenReturn(2); - when(operationsRepo.findClaimedIds(anyList(), any())) - .thenReturn(List.of(row1.getId(), row2.getId())); - when(operationsRepo.markScheduledBatch(anyList(), anyString())).thenReturn(2); + when(operationsRepo.updateBatch( + anyList(), eq(OperationStatus.PENDING), eq(OperationStatus.SCHEDULING), any(), any())) + .thenReturn(1); + // After dedup, only row1 (oldest by createdAt then id) survives. + TableOperationsRow survivor = row1.getCreatedAt().isBefore(row2.getCreatedAt()) ? row1 : row2; + if (row1.getCreatedAt().equals(row2.getCreatedAt())) { + survivor = row1.getId().compareTo(row2.getId()) <= 0 ? row1 : row2; + } + stubFindClaimed(List.of(schedulingRow(survivor))); + when(operationsRepo.updateBatch( + anyList(), eq(OperationStatus.SCHEDULING), eq(OperationStatus.SCHEDULED), any(), any())) + .thenReturn(1); when(jobsClient.launch(anyString(), anyString(), anyList(), anyList(), anyString())) - .thenReturn(Optional.of("job-789")); + .thenReturn(Optional.of("job-dedup")); runner.schedule(OFD); - verify(operationsRepo).cancelDuplicatePendingBatch(eq(OFD_DB), anyList()); + // Exactly one ID was cancelled (the duplicate). + ArgumentCaptor> cancelled = ArgumentCaptor.forClass(List.class); + verify(operationsRepo).cancel(cancelled.capture()); + assertThat(cancelled.getValue()).hasSize(1); } @Test @@ -205,20 +278,23 @@ void schedule_partialClaim_launchesAndMarksOnlyClaimedSubset() { TableOperationsRow rowA = pendingRow(uuidA, "db1", "tblA"); TableOperationsRow rowB = pendingRow(uuidB, "db1", "tblB"); - when(operationsRepo.find(OFD_DB, PENDING_DB, null, null, null)).thenReturn(List.of(rowA, rowB)); + stubFindPending(List.of(rowA, rowB)); when(statsRepo.findAllById(any())) .thenReturn(List.of(statsRow(uuidA, 100L), statsRow(uuidB, 100L))); stubOneBinForAllCandidates(); - // We submitted ids [A, B] to mark; only A was actually claimed (B owned by another instance). - when(operationsRepo.markSchedulingBatch(anyList(), any())).thenReturn(1); - when(operationsRepo.findClaimedIds(anyList(), any())).thenReturn(List.of(rowA.getId())); - when(operationsRepo.markScheduledBatch(anyList(), anyString())).thenReturn(1); + when(operationsRepo.updateBatch( + anyList(), eq(OperationStatus.PENDING), eq(OperationStatus.SCHEDULING), any(), any())) + .thenReturn(1); + // Only A actually claimed (B owned by another instance). + stubFindClaimed(List.of(schedulingRow(rowA))); + when(operationsRepo.updateBatch( + anyList(), eq(OperationStatus.SCHEDULING), eq(OperationStatus.SCHEDULED), any(), any())) + .thenReturn(1); when(jobsClient.launch(anyString(), anyString(), anyList(), anyList(), anyString())) .thenReturn(Optional.of("job-partial")); runner.schedule(OFD); - // Job is launched for the claimed subset only. ArgumentCaptor> launchedTableNames = ArgumentCaptor.forClass(List.class); ArgumentCaptor> launchedOpIds = ArgumentCaptor.forClass(List.class); verify(jobsClient) @@ -231,10 +307,13 @@ void schedule_partialClaim_launchesAndMarksOnlyClaimedSubset() { assertThat(launchedTableNames.getValue()).containsExactly("db1.tblA"); assertThat(launchedOpIds.getValue()).containsExactly(rowA.getId()); - // markScheduledBatch is called only with the claimed id, not the unclaimed one. - verify(operationsRepo).markScheduledBatch(eq(List.of(rowA.getId())), eq("job-partial")); - verify(operationsRepo, never()).markScheduledBatch(eq(List.of(rowB.getId())), anyString()); - verify(operationsRepo, never()).markPendingBatch(anyList()); + verify(operationsRepo) + .updateBatch( + eq(List.of(rowA.getId())), + eq(OperationStatus.SCHEDULING), + eq(OperationStatus.SCHEDULED), + eq(Optional.empty()), + eq(Optional.of("job-partial"))); } @Test @@ -244,21 +323,29 @@ void schedule_opsWithoutStats_skipped() { TableOperationsRow withStatsRow = pendingRow(withStats, "db1", "tblA"); TableOperationsRow missingRow = pendingRow(missing, "db1", "tblB"); - when(operationsRepo.find(OFD_DB, PENDING_DB, null, null, null)) - .thenReturn(List.of(withStatsRow, missingRow)); + stubFindPending(List.of(withStatsRow, missingRow)); when(statsRepo.findAllById(any())).thenReturn(List.of(statsRow(withStats, 50L))); stubOneBinForAllCandidates(); - when(operationsRepo.markSchedulingBatch(anyList(), any())).thenReturn(1); - when(operationsRepo.findClaimedIds(anyList(), any())).thenReturn(List.of(withStatsRow.getId())); - when(operationsRepo.markScheduledBatch(anyList(), anyString())).thenReturn(1); + when(operationsRepo.updateBatch( + anyList(), eq(OperationStatus.PENDING), eq(OperationStatus.SCHEDULING), any(), any())) + .thenReturn(1); + stubFindClaimed(List.of(schedulingRow(withStatsRow))); + when(operationsRepo.updateBatch( + anyList(), eq(OperationStatus.SCHEDULING), eq(OperationStatus.SCHEDULED), any(), any())) + .thenReturn(1); when(jobsClient.launch(anyString(), anyString(), anyList(), anyList(), anyString())) .thenReturn(Optional.of("job-skip")); runner.schedule(OFD); - // Only the op with a stats row makes it into the claim batch. ArgumentCaptor> ids = ArgumentCaptor.forClass(List.class); - verify(operationsRepo).markSchedulingBatch(ids.capture(), any()); + verify(operationsRepo) + .updateBatch( + ids.capture(), + eq(OperationStatus.PENDING), + eq(OperationStatus.SCHEDULING), + any(), + any()); assertThat(ids.getValue()).containsExactly(withStatsRow.getId()); } @@ -266,7 +353,7 @@ void schedule_opsWithoutStats_skipped() { void schedule_allOpsWithoutStats_noJobSubmitted() { TableOperationsRow row = pendingRow(UUID.randomUUID().toString(), "db1", "tbl1"); - when(operationsRepo.find(OFD_DB, PENDING_DB, null, null, null)).thenReturn(List.of(row)); + stubFindPending(List.of(row)); when(statsRepo.findAllById(any())).thenReturn(List.of()); runner.schedule(OFD);