diff --git a/server/src/internalClusterTest/java/org/opensearch/index/shard/IndexShardIT.java b/server/src/internalClusterTest/java/org/opensearch/index/shard/IndexShardIT.java index 8cd6fb7ed5aa6..a02f96cca4996 100644 --- a/server/src/internalClusterTest/java/org/opensearch/index/shard/IndexShardIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/index/shard/IndexShardIT.java @@ -852,7 +852,7 @@ public void testNoOpEngineFactoryTakesPrecedence() { final IndexService indexService = indicesService.indexServiceSafe(indexMetadata.getIndex()); for (IndexShard indexShard : indexService) { - assertThat(indexShard.getEngine(), instanceOf(NoOpEngine.class)); + assertThat(indexShard.getIndexer(), instanceOf(NoOpEngine.class)); } } diff --git a/server/src/internalClusterTest/java/org/opensearch/snapshots/SharedClusterSnapshotRestoreIT.java b/server/src/internalClusterTest/java/org/opensearch/snapshots/SharedClusterSnapshotRestoreIT.java index f45a276be1033..35a6b4649cb22 100644 --- a/server/src/internalClusterTest/java/org/opensearch/snapshots/SharedClusterSnapshotRestoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/snapshots/SharedClusterSnapshotRestoreIT.java @@ -103,7 +103,7 @@ import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS; import static org.opensearch.cluster.routing.allocation.decider.MaxRetryAllocationDecider.SETTING_ALLOCATION_MAX_RETRY; -import static org.opensearch.index.shard.IndexShardTests.getEngineFromShard; +import static org.opensearch.index.shard.IndexShardTestCase.getEngine; import static org.opensearch.indices.recovery.RecoverySettings.INDICES_RECOVERY_MAX_BYTES_PER_SEC_SETTING; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAllSuccessful; @@ -2052,7 +2052,7 @@ public void testSnapshottingWithMissingSequenceNumbers() { final Index index = resolveIndex(indexName); final IndexShard primary = internalCluster().getInstance(IndicesService.class, dataNode).getShardOrNull(new ShardId(index, 0)); // create a gap in the sequence numbers - EngineTestCase.generateNewSeqNo(getEngineFromShard(primary)); + EngineTestCase.generateNewSeqNo(getEngine(primary)); for (int i = 5; i < 10; i++) { index(indexName, "_doc", Integer.toString(i), "foo", "bar" + i); diff --git a/server/src/main/java/org/opensearch/index/engine/Engine.java b/server/src/main/java/org/opensearch/index/engine/Engine.java index 3ec1788a70309..6cdef2f4cfd87 100644 --- a/server/src/main/java/org/opensearch/index/engine/Engine.java +++ b/server/src/main/java/org/opensearch/index/engine/Engine.java @@ -2234,5 +2234,4 @@ public long getMaxSeenAutoIdTimestamp() { * to advance this marker to at least the given sequence number. */ public abstract void advanceMaxSeqNoOfUpdatesOrDeletes(long maxSeqNoOfUpdatesOnPrimary); - } diff --git a/server/src/main/java/org/opensearch/index/engine/EngineBackedIndexer.java b/server/src/main/java/org/opensearch/index/engine/EngineBackedIndexer.java new file mode 100644 index 0000000000000..173764967a814 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/EngineBackedIndexer.java @@ -0,0 +1,413 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine; + +import org.apache.logging.log4j.Logger; +import org.apache.lucene.index.IndexCommit; +import org.apache.lucene.index.SegmentInfos; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.concurrent.GatedCloseable; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.common.unit.ByteSizeValue; +import org.opensearch.index.VersionType; +import org.opensearch.index.engine.exec.CatalogSnapshot; +import org.opensearch.index.engine.exec.Indexer; +import org.opensearch.index.engine.exec.SegmentInfosCatalogSnapshot; +import org.opensearch.index.mapper.DocumentMapperForType; +import org.opensearch.index.mapper.SourceToParse; +import org.opensearch.index.merge.MergeStats; +import org.opensearch.index.seqno.SeqNoStats; +import org.opensearch.index.shard.DocsStats; +import org.opensearch.index.shard.ShardPath; +import org.opensearch.index.translog.Translog; +import org.opensearch.index.translog.TranslogManager; +import org.opensearch.indices.pollingingest.PollingIngestStats; +import org.opensearch.search.suggest.completion.CompletionStats; + +import java.io.Closeable; +import java.io.IOException; +import java.util.List; +import java.util.Map; + +/** + * An indexer implementation that uses an engine to perform indexing operations. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class EngineBackedIndexer implements Indexer { + + private final Engine engine; + + public EngineBackedIndexer(Engine engine) { + this.engine = engine; + } + + @Override + public EngineConfig config() { + return engine.config(); + } + + @Override + public Engine.IndexResult index(Engine.Index index) throws IOException { + return engine.index(index); + } + + @Override + public Engine.DeleteResult delete(Engine.Delete delete) throws IOException { + return engine.delete(delete); + } + + @Override + public Engine.NoOpResult noOp(Engine.NoOp noOp) throws IOException { + return engine.noOp(noOp); + } + + @Override + public int countNumberOfHistoryOperations(String source, long fromSeqNo, long toSeqNumber) throws IOException { + return engine.countNumberOfHistoryOperations(source, fromSeqNo, toSeqNumber); + } + + @Override + public boolean hasCompleteOperationHistory(String reason, long startingSeqNo) { + return engine.hasCompleteOperationHistory(reason, startingSeqNo); + } + + @Override + public long getIndexBufferRAMBytesUsed() { + return engine.getIndexBufferRAMBytesUsed(); + } + + @Override + public List segments(boolean verbose) { + return engine.segments(verbose); + } + + @Override + public long getMaxSeenAutoIdTimestamp() { + return engine.getMaxSeenAutoIdTimestamp(); + } + + @Override + public void updateMaxUnsafeAutoIdTimestamp(long newTimestamp) { + engine.updateMaxUnsafeAutoIdTimestamp(newTimestamp); + } + + @Override + public long getMaxSeqNoOfUpdatesOrDeletes() { + return engine.getMaxSeqNoOfUpdatesOrDeletes(); + } + + @Override + public void advanceMaxSeqNoOfUpdatesOrDeletes(long maxSeqNoOfUpdatesOnPrimary) { + engine.advanceMaxSeqNoOfUpdatesOrDeletes(maxSeqNoOfUpdatesOnPrimary); + } + + @Override + public long getLastWriteNanos() { + return engine.getLastWriteNanos(); + } + + @Override + public int fillSeqNoGaps(long primaryTerm) throws IOException { + return engine.fillSeqNoGaps(primaryTerm); + } + + @Override + public void forceMerge( + boolean flush, + int maxNumSegments, + boolean onlyExpungeDeletes, + boolean upgrade, + boolean upgradeOnlyAncientSegments, + String forceMergeUUID + ) throws EngineException, IOException { + engine.forceMerge(flush, maxNumSegments, onlyExpungeDeletes, upgrade, upgradeOnlyAncientSegments, forceMergeUUID); + } + + @Override + public void onSettingsChanged(TimeValue translogRetentionAge, ByteSizeValue translogRetentionSize, long softDeletesRetentionOps) { + engine.onSettingsChanged(translogRetentionAge, translogRetentionSize, softDeletesRetentionOps); + } + + @Override + public void writeIndexingBuffer() throws EngineException { + engine.writeIndexingBuffer(); + } + + @Override + public void refresh(String source) throws EngineException { + engine.refresh(source); + } + + @Override + public void flush(boolean force, boolean waitIfOngoing) throws EngineException { + engine.flush(force, waitIfOngoing); + } + + @Override + public void flush() { + engine.flush(); + } + + @Override + public boolean shouldPeriodicallyFlush() { + return engine.shouldPeriodicallyFlush(); + } + + @Override + public SafeCommitInfo getSafeCommitInfo() { + return engine.getSafeCommitInfo(); + } + + @Override + public TranslogManager translogManager() { + return engine.translogManager(); + } + + @Override + public Closeable acquireHistoryRetentionLock() { + return engine.acquireHistoryRetentionLock(); + } + + @Override + public Translog.Snapshot newChangesSnapshot( + String source, + long fromSeqNo, + long toSeqNo, + boolean requiredFullRange, + boolean accurateCount + ) throws IOException { + return engine.newChangesSnapshot(source, fromSeqNo, toSeqNo, requiredFullRange, accurateCount); + } + + @Override + public String getHistoryUUID() { + return engine.getHistoryUUID(); + } + + @Override + public void flushAndClose() throws IOException { + engine.flushAndClose(); + } + + @Override + public void failEngine(String reason, Exception failure) { + engine.failEngine(reason, failure); + } + + @Override + public Engine.Index prepareIndex( + DocumentMapperForType docMapper, + SourceToParse source, + long seqNo, + long primaryTerm, + long version, + VersionType versionType, + Engine.Operation.Origin origin, + long autoGeneratedIdTimestamp, + boolean isRetry, + long ifSeqNo, + long ifPrimaryTerm + ) { + return engine.prepareIndex( + docMapper, + source, + seqNo, + primaryTerm, + version, + versionType, + origin, + autoGeneratedIdTimestamp, + isRetry, + ifSeqNo, + ifPrimaryTerm + ); + } + + @Override + public Engine.Delete prepareDelete( + String id, + long seqNo, + long primaryTerm, + long version, + VersionType versionType, + Engine.Operation.Origin origin, + long ifSeqNo, + long ifPrimaryTerm + ) { + return engine.prepareDelete(id, seqNo, primaryTerm, version, versionType, origin, ifSeqNo, ifPrimaryTerm); + } + + @Override + public GatedCloseable acquireSafeIndexCommit() throws EngineException { + return engine.acquireSafeIndexCommit(); + } + + @Override + public long getPersistedLocalCheckpoint() { + return engine.getPersistedLocalCheckpoint(); + } + + @Override + public long getProcessedLocalCheckpoint() { + return engine.getProcessedLocalCheckpoint(); + } + + @Override + public SeqNoStats getSeqNoStats(long globalCheckpoint) { + return engine.getSeqNoStats(globalCheckpoint); + } + + @Override + public long getLastSyncedGlobalCheckpoint() { + return engine.getLastSyncedGlobalCheckpoint(); + } + + @Override + public long getMinRetainedSeqNo() { + return engine.getMinRetainedSeqNo(); + } + + @Override + public CommitStats commitStats() { + return engine.commitStats(); + } + + @Override + public DocsStats docStats() { + return engine.docStats(); + } + + @Override + public SegmentsStats segmentsStats(boolean includeSegmentFileSizes, boolean includeUnloadedSegments) { + return engine.segmentsStats(includeSegmentFileSizes, includeUnloadedSegments); + } + + @Override + public CompletionStats completionStats(String... fieldNamePatterns) { + return engine.completionStats(fieldNamePatterns); + } + + @Override + public PollingIngestStats pollingIngestStats() { + return engine.pollingIngestStats(); + } + + @Override + public MergeStats getMergeStats() { + return engine.getMergeStats(); + } + + @Override + public long getIndexThrottleTimeInMillis() { + return engine.getIndexThrottleTimeInMillis(); + } + + @Override + public boolean isThrottled() { + return engine.isThrottled(); + } + + @Override + public void activateThrottling() { + engine.activateThrottling(); + } + + @Override + public void deactivateThrottling() { + engine.deactivateThrottling(); + } + + @Override + public long getWritingBytes() { + return engine.getWritingBytes(); + } + + @Override + public long unreferencedFileCleanUpsPerformed() { + return engine.unreferencedFileCleanUpsPerformed(); + } + + @Override + public boolean refreshNeeded() { + return engine.refreshNeeded(); + } + + @Override + public void verifyEngineBeforeIndexClosing() throws IllegalStateException { + engine.verifyEngineBeforeIndexClosing(); + } + + @Override + public void maybePruneDeletes() { + engine.maybePruneDeletes(); + } + + @Override + public boolean maybeRefresh(String source) { + return engine.maybeRefresh(source); + } + + @Override + public void close() throws IOException { + engine.close(); + } + + @Override + public void ensureOpen() { + engine.ensureOpen(); + } + + @Override + public long lastRefreshedCheckpoint() { + return Indexer.super.lastRefreshedCheckpoint(); + } + + @Override + public long currentOngoingRefreshCheckpoint() { + return Indexer.super.currentOngoingRefreshCheckpoint(); + } + + @Override + public void finalizeReplication(CatalogSnapshot catalogSnapshot, ShardPath shardPath) throws IOException { + Indexer.super.finalizeReplication(catalogSnapshot, shardPath); + } + + @Override + public void maybeDie(Logger logger, String maybeMessage, Throwable maybeFatal) { + Indexer.super.maybeDie(logger, maybeMessage, maybeFatal); + } + + @Override + public long getNativeBytesUsed() { + return Indexer.super.getNativeBytesUsed(); + } + + @Override + public String loadHistoryUUID(Map commitData) { + return Indexer.super.loadHistoryUUID(commitData); + } + + /** + * Returns a snapshot of the catalog of segments in this engine. This snapshot is + * guaranteed to be consistent and can be used for recovery purposes. + */ + @ExperimentalApi + @Override + public GatedCloseable acquireSnapshot() { + GatedCloseable segmentInfos = engine.getSegmentInfosSnapshot(); + SegmentInfosCatalogSnapshot catalogSnapshot = new SegmentInfosCatalogSnapshot(segmentInfos.get()); + return new GatedCloseable<>(catalogSnapshot, segmentInfos::close); + } + + public Engine getEngine() { + return engine; + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/dataformat/DataFormat.java b/server/src/main/java/org/opensearch/index/engine/dataformat/DataFormat.java new file mode 100644 index 0000000000000..ffea0cdf69668 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/dataformat/DataFormat.java @@ -0,0 +1,137 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.dataformat; + +import org.opensearch.common.annotation.ExperimentalApi; + +import java.util.Objects; +import java.util.Set; + +/** + * Represents a data format for storing and managing index data, with declared capabilities. + * Each data format (e.g., Lucene, Parquet) declares what storage and query capabilities it supports. + * Equality is based on the format name — there should be one DataFormat instance per unique name. + * + * @opensearch.experimental + */ +@ExperimentalApi +public final class DataFormat { + + /** Well-known Lucene data format sentinel — used as the universal fallback. */ + public static final DataFormat LUCENE = new DataFormat("lucene", Set.of(), 0); + + /** Well-known metadata format sentinel — used for segment metadata files. */ + public static final DataFormat METADATA = new DataFormat("metadata", Set.of(), -1); + + /** + * Returns a well-known DataFormat for the given name, or creates a bare sentinel instance. + * This is intended for deserialization paths where only the format name is available. + * Well-known names ("lucene", "metadata") return the canonical constant instances. + * + * @param name the data format name + * @return the corresponding DataFormat instance + */ + public static DataFormat of(String name) { + if (LUCENE.name.equals(name)) return LUCENE; + if (METADATA.name.equals(name)) return METADATA; + return new DataFormat(name, Set.of(), 0); + } + + /** + * Capabilities that a data format can support. + */ + @ExperimentalApi + public enum Capability { + /** Inverted index based full-text search (BM25, phrase queries) */ + FULL_TEXT_SEARCH, + /** Column-oriented storage optimized for aggregations and analytics */ + COLUMNAR_STORAGE, + /** Vector similarity search (kNN, ANN) */ + VECTOR_SEARCH, + /** Numeric and date range queries via point trees */ + POINT_RANGE, + /** Column-stride field data for sorting and scripting */ + DOC_VALUES, + /** Original field value retrieval */ + STORED_FIELDS + } + + private final String name; + private final Set capabilities; + private final int priority; + + /** + * Constructs a DataFormat with the given name, capabilities, and priority. + * + * @param name unique identifier for this format (e.g., "lucene", "parquet") + * @param capabilities the set of capabilities this format supports + * @param priority routing priority — higher values are preferred when multiple formats support a field + */ + public DataFormat(String name, Set capabilities, int priority) { + this.name = Objects.requireNonNull(name, "name must not be null"); + this.capabilities = Set.copyOf(capabilities); + this.priority = priority; + } + + /** + * Returns the unique name of this data format. + * + * @return the format name + */ + public String name() { + return name; + } + + /** + * Checks if this format supports the given capability. + * + * @param capability the capability to check + * @return true if supported + */ + public boolean supports(Capability capability) { + return capabilities.contains(capability); + } + + /** + * Returns all capabilities supported by this format. + * + * @return unmodifiable set of capabilities + */ + public Set capabilities() { + return capabilities; + } + + /** + * Returns the routing priority. Higher values are preferred when multiple formats support a field. + * Lucene should use 0 as the universal fallback. + * + * @return the priority value + */ + public int priority() { + return priority; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + DataFormat that = (DataFormat) o; + return name.equals(that.name); + } + + @Override + public int hashCode() { + return name.hashCode(); + } + + @Override + public String toString() { + return "DataFormat{name='" + name + "', capabilities=" + capabilities + ", priority=" + priority + "}"; + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/dataformat/DataFormatPlugin.java b/server/src/main/java/org/opensearch/index/engine/dataformat/DataFormatPlugin.java new file mode 100644 index 0000000000000..1e23f09a58a28 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/dataformat/DataFormatPlugin.java @@ -0,0 +1,39 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.dataformat; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.mapper.MappedFieldType; + +/** + * Plugin interface for providing custom data format implementations. + * Plugins implement this to register their data format (e.g., Parquet, Lucene) + * with the {@link DataFormatRegistry} during node bootstrap. + * + * @opensearch.experimental + */ +@ExperimentalApi +public interface DataFormatPlugin { + + /** + * Returns the data format with its declared capabilities and priority. + * + * @return the data format descriptor + */ + DataFormat getDataFormat(); + + /** + * Checks if this data format can handle the given field type. + * Used by the registry to route fields to the appropriate format during indexing. + * + * @param fieldType the mapped field type to check + * @return true if this format supports the field type + */ + boolean supportsField(MappedFieldType fieldType); +} diff --git a/server/src/main/java/org/opensearch/index/engine/dataformat/DataFormatRegistry.java b/server/src/main/java/org/opensearch/index/engine/dataformat/DataFormatRegistry.java new file mode 100644 index 0000000000000..a2781582d085e --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/dataformat/DataFormatRegistry.java @@ -0,0 +1,131 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.dataformat; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.MapperService; + +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +/** + * Registry for data format plugins, built once during node bootstrap. + * Provides lookup by name and field-to-format routing based on declared capabilities and priority. + *

+ * This follows the same immutable registry pattern as {@link org.opensearch.indices.mapper.MapperRegistry}. + * Plugins register their {@link DataFormatPlugin} implementations via + * {@link org.opensearch.plugins.PluginsService}, and the registry is constructed during node startup. + * + * @opensearch.experimental + */ +@ExperimentalApi +public final class DataFormatRegistry { + + private final Map formats; + private final List priorityOrder; + + /** + * Constructs the registry from a list of data format plugins. + * Plugins are validated for uniqueness and sorted by priority (highest first) for field routing. + * + * @param plugins the list of data format plugins to register + * @throws IllegalArgumentException if duplicate format names are detected + */ + public DataFormatRegistry(List plugins) { + Map map = new LinkedHashMap<>(); + for (DataFormatPlugin plugin : plugins) { + String name = plugin.getDataFormat().name(); + if (map.containsKey(name)) { + throw new IllegalArgumentException("Duplicate data format registered: [" + name + "]"); + } + map.put(name, plugin); + } + this.formats = Collections.unmodifiableMap(map); + this.priorityOrder = plugins.stream() + .sorted(Comparator.comparingInt((DataFormatPlugin p) -> p.getDataFormat().priority()).reversed()) + .toList(); + } + + /** + * Looks up a plugin by format name. Used during deserialization of Segment/FileMetadata. + * + * @param name the data format name + * @return the plugin, or null if not registered + */ + public DataFormatPlugin getPlugin(String name) { + return formats.get(name); + } + + /** + * Looks up a DataFormat by name. + * + * @param name the data format name + * @return the DataFormat, or null if not registered + */ + public DataFormat getFormat(String name) { + DataFormatPlugin plugin = formats.get(name); + return plugin != null ? plugin.getDataFormat() : null; + } + + /** + * Returns all registered plugins. + * + * @return unmodifiable collection of plugins + */ + public Collection getPlugins() { + return formats.values(); + } + + /** + * Returns all registered format names. + * + * @return unmodifiable collection of format names + */ + public Collection getFormatNames() { + return formats.keySet(); + } + + /** + * Routes a field to the best data format based on priority and capability. + * The highest-priority plugin that supports the field wins. + * + * @param fieldType the mapped field type + * @return the best matching DataFormat + * @throws IllegalStateException if no format supports the field + */ + public DataFormat routeField(MappedFieldType fieldType) { + for (DataFormatPlugin plugin : priorityOrder) { + if (plugin.supportsField(fieldType)) { + return plugin.getDataFormat(); + } + } + throw new IllegalStateException("No data format supports field: [" + fieldType.name() + "] of type [" + fieldType.typeName() + "]"); + } + + /** + * Builds a complete field-to-format routing table for an index mapping. + * Called once per mapping change and cached on the IndexShard. + * + * @param mapperService the mapper service containing all field types + * @return immutable map of field name to DataFormat + */ + public Map buildRoutingTable(MapperService mapperService) { + Map table = new HashMap<>(); + for (MappedFieldType fieldType : mapperService.fieldTypes()) { + table.put(fieldType.name(), routeField(fieldType)); + } + return Collections.unmodifiableMap(table); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/dataformat/DocumentInput.java b/server/src/main/java/org/opensearch/index/engine/dataformat/DocumentInput.java new file mode 100644 index 0000000000000..e4b60ffa5426c --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/dataformat/DocumentInput.java @@ -0,0 +1,84 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.dataformat; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.mapper.MappedFieldType; + +import java.io.IOException; + +/** + * Represents a document input for adding fields and metadata to a writer. + * + * @param the type of the final input representation + * + * @opensearch.experimental + */ +@ExperimentalApi +public interface DocumentInput extends AutoCloseable { + + /** + * Adds a row ID field to the document. + * + * @param fieldName the name of the row ID field + * @param rowId the row ID value + */ + void addRowIdField(String fieldName, long rowId); + + /** + * Adds a field to the document. + * + * @param fieldType the mapped field type + * @param value the field value + */ + void addField(MappedFieldType fieldType, Object value); + + /** + * Gets the final input representation. + * + * @return the final input of type T + */ + T getFinalInput(); + + /** + * Adds this document to the writer. + * + * @return the write result + * @throws IOException if an I/O error occurs + */ + WriteResult addToWriter() throws IOException; + + /** + * Sets the version for this document. + * + * @param version the version number + */ + default void setVersion(long version) { + // Default no-op implementations, override as needed + } + + /** + * Sets the sequence number for this document. + * + * @param seqNo the sequence number + */ + default void setSeqNo(long seqNo) { + // Default no-op implementations, override as needed + } + + /** + * Sets the primary term for this document. + * + * @param fieldName the field name + * @param seqNo the sequence number + */ + default void setPrimaryTerm(String fieldName, long seqNo) { + // Default no-op implementations, override as needed + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/dataformat/FileInfos.java b/server/src/main/java/org/opensearch/index/engine/dataformat/FileInfos.java new file mode 100644 index 0000000000000..577d695db4c52 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/dataformat/FileInfos.java @@ -0,0 +1,117 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.dataformat; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.exec.WriterFileSet; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; + +/** + * Container for file information organized by data format. + * + * @opensearch.experimental + */ +@ExperimentalApi +public final class FileInfos { + + private final Map writerFilesMap; + + private FileInfos() { + this.writerFilesMap = new HashMap<>(); + } + + /** + * Gets an unmodifiable map of writer file sets by data format. + * + * @return the writer files map + */ + public Map getWriterFilesMap() { + return Collections.unmodifiableMap(writerFilesMap); + } + + private void putWriterFileSet(DataFormat format, WriterFileSet writerFileSet) { + writerFilesMap.put(format, writerFileSet); + } + + /** + * Gets the writer file set for a specific data format. + * + * @param format the data format + * @return an Optional containing the writer file set, or empty if not found + */ + public Optional getWriterFileSet(DataFormat format) { + return Optional.ofNullable(writerFilesMap.get(format)); + } + + /** + * Creates an empty FileInfos instance. + * + * @return an empty FileInfos + */ + public static FileInfos empty() { + return new FileInfos(); + } + + /** + * Creates a new builder for FileInfos. + * + * @return a new builder instance + */ + public static Builder builder() { + return new Builder(); + } + + /** + * Builder for constructing FileInfos instances. + * + * @opensearch.experimental + */ + @ExperimentalApi + public static final class Builder { + private final Map writerFilesMap = new HashMap<>(); + + /** + * Adds a writer file set for a specific data format. + * + * @param format the data format + * @param writerFileSet the writer file set + * @return this builder + */ + public Builder putWriterFileSet(DataFormat format, WriterFileSet writerFileSet) { + writerFilesMap.put(format, writerFileSet); + return this; + } + + /** + * Adds all entries from the provided map. + * + * @param map the map of data formats to writer file sets + * @return this builder + */ + public Builder putAll(Map map) { + writerFilesMap.putAll(map); + return this; + } + + /** + * Builds the FileInfos instance. + * + * @return a new FileInfos instance + */ + public FileInfos build() { + FileInfos fileInfos = new FileInfos(); + writerFilesMap.forEach(fileInfos::putWriterFileSet); + return fileInfos; + } + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/dataformat/IndexingExecutionEngine.java b/server/src/main/java/org/opensearch/index/engine/dataformat/IndexingExecutionEngine.java new file mode 100644 index 0000000000000..aaf533703d928 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/dataformat/IndexingExecutionEngine.java @@ -0,0 +1,14 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.dataformat; + +/** + * Engine for executing indexing operations. + */ +public class IndexingExecutionEngine {} diff --git a/server/src/main/java/org/opensearch/index/engine/dataformat/LuceneDataFormatPlugin.java b/server/src/main/java/org/opensearch/index/engine/dataformat/LuceneDataFormatPlugin.java new file mode 100644 index 0000000000000..0521d4e580b96 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/dataformat/LuceneDataFormatPlugin.java @@ -0,0 +1,48 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.dataformat; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.mapper.MappedFieldType; + +import java.util.Set; + +/** + * Built-in Lucene data format plugin. Registered as the universal fallback at priority 0. + * Lucene supports all field types and provides full-text search, point ranges, doc values, and stored fields. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class LuceneDataFormatPlugin implements DataFormatPlugin { + + /** The Lucene data format name constant */ + public static final String NAME = DataFormat.LUCENE.name(); + + private static final DataFormat LUCENE = new DataFormat( + NAME, + Set.of( + DataFormat.Capability.FULL_TEXT_SEARCH, + DataFormat.Capability.POINT_RANGE, + DataFormat.Capability.DOC_VALUES, + DataFormat.Capability.STORED_FIELDS + ), + 0 // lowest priority — universal fallback + ); + + @Override + public DataFormat getDataFormat() { + return LUCENE; + } + + @Override + public boolean supportsField(MappedFieldType fieldType) { + return true; // Lucene handles all field types + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/dataformat/MergeResult.java b/server/src/main/java/org/opensearch/index/engine/dataformat/MergeResult.java new file mode 100644 index 0000000000000..0ec90640367f9 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/dataformat/MergeResult.java @@ -0,0 +1,53 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.dataformat; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.exec.WriterFileSet; + +import java.util.Map; + +/** + * Result of a merge operation containing merged writer file sets. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class MergeResult { + + private final Map mergedWriterFileSet; + + /** + * Constructs a merge result with the given merged writer file sets. + * + * @param mergedWriterFileSet map of data formats to merged writer file sets + */ + public MergeResult(Map mergedWriterFileSet) { + this.mergedWriterFileSet = mergedWriterFileSet; + } + + /** + * Gets all merged writer file sets. + * + * @return map of data formats to merged writer file sets + */ + public Map getMergedWriterFileSet() { + return mergedWriterFileSet; + } + + /** + * Gets the merged writer file set for a specific data format. + * + * @param dataFormat the data format + * @return the merged writer file set for the specified format + */ + public WriterFileSet getMergedWriterFileSetForDataformat(DataFormat dataFormat) { + return mergedWriterFileSet.get(dataFormat); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/dataformat/Merger.java b/server/src/main/java/org/opensearch/index/engine/dataformat/Merger.java new file mode 100644 index 0000000000000..69a2516a2e30f --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/dataformat/Merger.java @@ -0,0 +1,31 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.dataformat; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.exec.WriterFileSet; + +import java.util.List; + +/** + * Interface for merging multiple writer file sets into a single merged result. + * + * @opensearch.experimental + */ +@ExperimentalApi +public interface Merger { + /** + * Merges a list of writer file sets into a single merged result. + * + * @param fileMetadataList list of writer file sets to merge + * @param writerGeneration the writer generation number + * @return merge result containing row ID mapping and merged file metadata + */ + MergeResult merge(List fileMetadataList, long writerGeneration); +} diff --git a/server/src/main/java/org/opensearch/index/engine/dataformat/RefreshInput.java b/server/src/main/java/org/opensearch/index/engine/dataformat/RefreshInput.java new file mode 100644 index 0000000000000..d574bc33c77d3 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/dataformat/RefreshInput.java @@ -0,0 +1,71 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.dataformat; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.exec.WriterFileSet; + +import java.util.ArrayList; +import java.util.List; + +/** + * Input data for a refresh operation, containing existing segments and writer files. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class RefreshInput { + + private List existingSegments; + private final List writerFiles; + + /** + * Constructs a new refresh input with empty lists. + */ + public RefreshInput() { + this.writerFiles = new ArrayList<>(); + this.existingSegments = new ArrayList<>(); + } + + /** + * Sets the existing segments. + * + * @param existingSegments the list of existing segments + */ + public void setExistingSegments(List existingSegments) { + this.existingSegments = existingSegments; + } + + /** + * Adds a writer file set to the refresh input. + * + * @param writerFileSetGroup the writer file set to add + */ + public void add(WriterFileSet writerFileSetGroup) { + this.writerFiles.add(writerFileSetGroup); + } + + /** + * Gets the list of writer files. + * + * @return the writer files list + */ + public List getWriterFiles() { + return writerFiles; + } + + /** + * Gets the list of existing segments. + * + * @return the existing segments list + */ + public List getExistingSegments() { + return existingSegments; + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/dataformat/RefreshResult.java b/server/src/main/java/org/opensearch/index/engine/dataformat/RefreshResult.java new file mode 100644 index 0000000000000..ffb90564ce259 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/dataformat/RefreshResult.java @@ -0,0 +1,50 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.dataformat; + +import org.opensearch.common.annotation.ExperimentalApi; + +import java.util.ArrayList; +import java.util.List; + +/** + * Result of a refresh operation containing the refreshed segments. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class RefreshResult { + + private List refreshedSegments; + + /** + * Constructs a new refresh result with an empty list of segments. + */ + public RefreshResult() { + this.refreshedSegments = new ArrayList<>(); + } + + /** + * Gets the list of refreshed segments. + * + * @return the refreshed segments list + */ + public List getRefreshedSegments() { + return refreshedSegments; + } + + /** + * Sets the refreshed segments. + * + * @param refreshedSegments the list of refreshed segments + */ + public void setRefreshedSegments(List refreshedSegments) { + this.refreshedSegments = refreshedSegments; + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/dataformat/WriteResult.java b/server/src/main/java/org/opensearch/index/engine/dataformat/WriteResult.java new file mode 100644 index 0000000000000..21a79189b478b --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/dataformat/WriteResult.java @@ -0,0 +1,26 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.dataformat; + +import org.opensearch.common.annotation.ExperimentalApi; + +/** + * Result of a write operation. + * + * @param success whether the write was successful + * @param e the exception if the write failed, null otherwise + * @param version the document version + * @param term the primary term + * @param seqNo the sequence number + * + * @opensearch.experimental + */ +@ExperimentalApi +public record WriteResult(boolean success, Exception e, long version, long term, long seqNo) { +} diff --git a/server/src/main/java/org/opensearch/index/engine/dataformat/Writer.java b/server/src/main/java/org/opensearch/index/engine/dataformat/Writer.java new file mode 100644 index 0000000000000..bb9913b43e95a --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/dataformat/Writer.java @@ -0,0 +1,60 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.dataformat; + +import org.opensearch.common.annotation.ExperimentalApi; + +import java.io.IOException; + +/** + * Interface for writing documents to a data format. + * + * @param

the type of document input + * + * @opensearch.experimental + */ +@ExperimentalApi +public interface Writer

> { + + /** + * Adds a document to the writer. + * + * @param d the document input + * @return the write result + * @throws IOException if an I/O error occurs + */ + WriteResult addDoc(P d) throws IOException; + + /** + * Flushes the writer and returns file information. + * + * @return the file information after flush + * @throws IOException if an I/O error occurs + */ + FileInfos flush() throws IOException; + + /** + * Synchronizes the writer to ensure data is persisted. + * + * @throws IOException if an I/O error occurs + */ + void sync() throws IOException; + + /** + * Closes the writer and releases resources. + */ + void close(); + + /** + * Creates a new document input instance. + * + * @return a new document input + */ + P newDocumentInput(); +} diff --git a/server/src/main/java/org/opensearch/index/engine/dataformat/package-info.java b/server/src/main/java/org/opensearch/index/engine/dataformat/package-info.java new file mode 100644 index 0000000000000..6fda3c59ad4a0 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/dataformat/package-info.java @@ -0,0 +1,10 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** Classes for data format abstractions used by the indexing execution engine. */ +package org.opensearch.index.engine.dataformat; diff --git a/server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshot.java b/server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshot.java new file mode 100644 index 0000000000000..e4cc5137add8f --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshot.java @@ -0,0 +1,174 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.util.concurrent.AbstractRefCounted; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.Writeable; +import org.opensearch.index.engine.dataformat.DataFormat; + +import java.io.IOException; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Supplier; + +/** + * Abstract base class representing a snapshot of the catalog state at a specific point in time. + * Maintains versioned information about segments, files, and metadata for index operations. + * Extends AbstractRefCounted to support reference counting for safe concurrent access. + * Subclasses must implement methods for accessing file metadata, segments, and user data. + */ +@ExperimentalApi +public abstract class CatalogSnapshot extends AbstractRefCounted implements Writeable, Cloneable { + + /** + * Key for storing catalog snapshot in user data. + */ + public static final String CATALOG_SNAPSHOT_KEY = "_catalog_snapshot_"; + /** + * Key for storing last composite writer generation in user data. + */ + public static final String LAST_COMPOSITE_WRITER_GEN_KEY = "_last_composite_writer_gen_"; + /** + * Key for storing catalog snapshot ID in user data. + */ + public static final String CATALOG_SNAPSHOT_ID = "_id"; + + protected final long generation; + protected long version; + + public CatalogSnapshot(String name, long generation, long version) { + super(name); + this.generation = generation; + this.version = version; + } + + /** + * Constructs a CatalogSnapshot from a StreamInput for deserialization. + * + * @param in the stream input to read from + * @throws IOException if an I/O error occurs during deserialization + */ + public CatalogSnapshot(StreamInput in) throws IOException { + super("catalog_snapshot"); + this.generation = in.readLong(); + this.version = in.readLong(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeLong(generation); + out.writeLong(version); + } + + public long getGeneration() { + return generation; + } + + public long getVersion() { + return version; + } + + /** + * Retrieves all file metadata in this catalog snapshot. + * + * @return collection of FileMetadata objects + * @throws IOException if an I/O error occurs + */ + public abstract Collection getFileMetadataList() throws IOException; + + /** + * Gets user-defined metadata associated with this catalog snapshot. + * + * @return map of user data key-value pairs + */ + public abstract Map getUserData(); + + /** + * Gets the unique identifier for this catalog snapshot. + * + * @return the catalog snapshot ID + */ + public abstract long getId(); + + /** + * Gets all segments in this catalog snapshot. + * + * @return list of Segment objects + */ + public abstract List getSegments(); + + /** + * Retrieves searchable files for a typed DataFormat. + * + * @param dataFormat the DataFormat instance + * @return collection of WriterFileSet objects for the specified format + */ + public abstract Collection getSearchableFiles(DataFormat dataFormat); + + /** + * Gets all data formats present in this catalog snapshot. + * + * @return set of data format identifiers + */ + public abstract Set getDataFormats(); + + /** + * Gets the last writer generation number. + * + * @return the last writer generation + */ + public abstract long getLastWriterGeneration(); + + /** + * Serializes this catalog snapshot to a string representation. + * + * @return serialized string + * @throws IOException if an I/O error occurs + */ + public abstract String serializeToString() throws IOException; + + /** + * Sets the supplier for IndexFileDeleter. + * + * @param supplier the IndexFileDeleter supplier + */ + public abstract void setIndexFileDeleterSupplier(Supplier supplier); + + /** + * Sets the catalog snapshot map for tracking multiple snapshots. + * + * @param catalogSnapshotMap map of generation to catalog snapshots + */ + public abstract void setCatalogSnapshotMap(Map catalogSnapshotMap); + + /** + * Creates a clone without acquiring a reference count. + * Used for Lucene compatibility where clone is required. + * + * @return this catalog snapshot instance + */ + public CatalogSnapshot cloneNoAcquire() { + // Still using the clone call since Lucene call requires clone. This will allow a SegmentsInfos backed CatalogSnapshot to use the + // same method in calls. + return this; + } + + /** + * Sets user-defined metadata for this catalog snapshot. + * + * @param userData map of user data key-value pairs + * @param b additional boolean parameter for implementation-specific behavior + */ + public abstract void setUserData(Map userData, boolean b); +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/FileMetadata.java b/server/src/main/java/org/opensearch/index/engine/exec/FileMetadata.java new file mode 100644 index 0000000000000..7dafe285beb64 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/FileMetadata.java @@ -0,0 +1,106 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.dataformat.DataFormat; + +import java.util.Objects; + +/** + * Represents metadata for a file in the index, including its data format and filename. + * Files can be in different formats (e.g., "lucene", "metadata") and this class provides + * a unified way to represent and serialize file information across the system. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class FileMetadata { + + /** + * Delimiter used to separate filename and data format in serialized form. + */ + public static final String DELIMITER = ":::"; + private static final String METADATA_KEY = DataFormat.METADATA.name(); + + private final String file; + private final DataFormat dataFormat; + + /* Constructs a FileMetadata with explicit data format and filename. + * + * @param dataFormat the data format identifier (e.g., "lucene", "metadata") + * @param file the filename + */ + public FileMetadata(DataFormat format, String file) { + this.file = file; + this.dataFormat = format; + } + + /** + * Constructs a FileMetadata by parsing a serialized data-format-aware filename. + * The format is "filename:::dataFormat". If no delimiter is present and the filename + * starts with "metadata", it's treated as a metadata file. Otherwise, defaults to "lucene". + * + * @param dataFormatAwareFile the serialized filename with optional data format + */ + public FileMetadata(String dataFormatAwareFile) { + if (!dataFormatAwareFile.contains(DELIMITER) && dataFormatAwareFile.startsWith(METADATA_KEY)) { + this.dataFormat = DataFormat.METADATA; + this.file = dataFormatAwareFile; + return; + } + String[] parts = dataFormatAwareFile.split(DELIMITER); + this.dataFormat = (parts.length == 1) ? DataFormat.LUCENE : DataFormat.of(parts[1]); + this.file = parts[0]; + } + + /** + * Serializes this FileMetadata to a string in the format "filename:::dataFormat". + * + * @return the serialized representation + */ + public String serialize() { + return file + DELIMITER + dataFormat.name(); + } + + @Override + public String toString() { + return serialize(); + } + + /** + * Returns the filename. + * + * @return the filename + */ + public String file() { + return file; + } + + /** + * Returns the typed DataFormat instance. + * + * @return the DataFormat + */ + public DataFormat dataFormatType() { + return dataFormat; + } + + @Override + public boolean equals(Object o) { + if (o == null || getClass() != o.getClass()) return false; + FileMetadata that = (FileMetadata) o; + return Objects.equals(file, that.file) && Objects.equals(dataFormat, that.dataFormat); + } + + @Override + public int hashCode() { + return Objects.hash(file, dataFormat); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/IndexFileReferenceManager.java b/server/src/main/java/org/opensearch/index/engine/exec/IndexFileReferenceManager.java new file mode 100644 index 0000000000000..353ebe6ec699f --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/IndexFileReferenceManager.java @@ -0,0 +1,37 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; + +/** + * Manages reference counting for index files to prevent premature deletion. + * Tracks which files are in use by catalog snapshots and ensures files are only + * deleted when no longer referenced by any snapshot. + * + * @opensearch.experimental + */ +@ExperimentalApi +public interface IndexFileReferenceManager { + /** + * Adds file references for all files in the given catalog snapshot. + * Increments reference counts to prevent deletion while the snapshot is in use. + * + * @param snapshot the catalog snapshot whose files should be referenced + */ + void addFileReferences(CatalogSnapshot snapshot); + + /** + * Removes file references for all files in the given catalog snapshot. + * Decrements reference counts and may trigger deletion of unreferenced files. + * + * @param snapshot the catalog snapshot whose file references should be removed + */ + void removeFileReferences(CatalogSnapshot snapshot); +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/Indexer.java b/server/src/main/java/org/opensearch/index/engine/exec/Indexer.java new file mode 100644 index 0000000000000..f94a6af92bff5 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/Indexer.java @@ -0,0 +1,195 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.apache.logging.log4j.Logger; +import org.apache.lucene.index.IndexCommit; +import org.opensearch.ExceptionsHelper; +import org.opensearch.common.Nullable; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.concurrent.GatedCloseable; +import org.opensearch.index.engine.EngineConfig; +import org.opensearch.index.engine.EngineException; +import org.opensearch.index.engine.LifecycleAware; +import org.opensearch.index.engine.SafeCommitInfo; +import org.opensearch.index.translog.Translog; +import org.opensearch.index.translog.TranslogManager; + +import java.io.Closeable; +import java.io.IOException; +import java.util.Map; + +import static org.opensearch.index.engine.Engine.HISTORY_UUID_KEY; + +/** + * Unified interface for indexing operations in OpenSearch. + *

+ * This interface provides a complete abstraction for document indexing, combining: + *

    + *
  • {@link IndexerEngineOperations} - Core CRUD operations (index, delete, no-op)
  • + *
  • {@link IndexerStateManager} - Sequence numbers, checkpoints, and timestamps
  • + *
  • {@link IndexerLifecycleOperations} - Flush, refresh, merge, and throttling
  • + *
  • {@link IndexerStatistics} - Performance metrics and resource usage
  • + *
+ *

+ * The interface is designed to support multiple implementations: + *

    + *
  • Engine-backed indexing (traditional Lucene-based)
  • + *
  • Alternative storage engines
  • + *
  • Delegating or decorating implementations
  • + *
+ *

+ * Key responsibilities: + *

    + *
  • Document lifecycle management (create, update, delete)
  • + *
  • Translog and durability guarantees
  • + *
  • Replication and recovery support via sequence numbers
  • + *
  • Search visibility through refresh operations
  • + *
  • Resource management and cleanup
  • + *
+ * + * @see org.opensearch.index.engine.EngineBackedIndexer for the primary implementation + * @opensearch.experimental + */ +@ExperimentalApi +public interface Indexer + extends + LifecycleAware, + Closeable, + IndexerEngineOperations, + IndexerStateManager, + IndexerLifecycleOperations, + IndexerStatistics { + + /** + * Returns the engine configuration for this indexer. + * Contains settings for merge policy, translog, codec, and other engine parameters. + * + * @return the engine configuration + */ + EngineConfig config(); + + /** + * Returns information about the safe commit point. + * The safe commit represents a consistent state that can be used for recovery. + * + * @return safe commit information including local and global checkpoints + */ + SafeCommitInfo getSafeCommitInfo(); + + /** + * Returns the translog manager for this indexer. + * Provides access to the transaction log for durability and recovery. + * + * @return the translog manager + */ + TranslogManager translogManager(); + + /** + * Acquires a lock to prevent history pruning. + * Used during operations that need to read historical operations (e.g., recovery, replication). + * + * @return a closeable lock that must be released when history access is complete + */ + Closeable acquireHistoryRetentionLock(); + + /** + * Creates a snapshot of operations within the specified sequence number range. + * Used for replication and recovery to replay operations on other nodes. + * + * @param source description of why the snapshot is being created + * @param fromSeqNo starting sequence number (inclusive) + * @param toSeqNo ending sequence number (inclusive) + * @param requiredFullRange if true, fails if the full range is not available + * @param accurateCount if true, provides accurate operation count (may be slower) + * @return a snapshot of operations in the specified range + * @throws IOException if an I/O error occurs while creating the snapshot + */ + Translog.Snapshot newChangesSnapshot(String source, long fromSeqNo, long toSeqNo, boolean requiredFullRange, boolean accurateCount) + throws IOException; + + /** + * Returns the unique identifier for this index's history. + * Used to ensure replicas are synchronized with the correct primary. + * + * @return the history UUID + */ + String getHistoryUUID(); + + /** + * Flushes all pending changes and closes the indexer. + * Ensures all data is persisted before shutdown. + * + * @throws IOException if an I/O error occurs during flush or close + */ + void flushAndClose() throws IOException; + + /** + * Marks the engine as failed and prevents further operations. + * Called when an unrecoverable error occurs. + * + * @param reason description of why the engine failed + * @param failure the exception that caused the failure, or null + */ + void failEngine(String reason, @Nullable Exception failure); + + /** + * Acquires a snapshot of the current catalog state. + * The snapshot contains segment metadata and file information for replication. + * + * @return a gated closeable wrapping the catalog snapshot + */ + GatedCloseable acquireSnapshot(); + + /** + * Checks if the throwable contains a fatal error and throws it if present. + * Fatal errors (like OutOfMemoryError) should not be caught and must propagate + * to the uncaught exception handler. + * + * @param logger the logger to use for error messages + * @param maybeMessage the message to log if a fatal error is found + * @param maybeFatal the throwable to check for fatal errors + */ + @SuppressWarnings("finally") + default void maybeDie(final Logger logger, final String maybeMessage, final Throwable maybeFatal) { + ExceptionsHelper.maybeError(maybeFatal).ifPresent(error -> { + try { + logger.error(maybeMessage, error); + } finally { + throw error; + } + }); + } + + /** + * Reads the history UUID from commit user data. + * The history UUID identifies the lineage of operations in this index. + * + * @param commitData the commit user data map + * @return the history UUID + * @throws IllegalStateException if the commit data doesn't contain a history UUID + */ + default String loadHistoryUUID(Map commitData) { + final String uuid = commitData.get(HISTORY_UUID_KEY); + if (uuid == null) { + throw new IllegalStateException("commit doesn't contain history uuid"); + } + return uuid; + } + + /** + * Acquires a safe index commit for snapshot or recovery operations. + * The commit is guaranteed to be consistent and will not be deleted while held. + * + * @return a gated closeable wrapping the index commit + * @throws EngineException if acquiring the commit fails + */ + GatedCloseable acquireSafeIndexCommit() throws EngineException; + +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/IndexerEngineOperations.java b/server/src/main/java/org/opensearch/index/engine/exec/IndexerEngineOperations.java new file mode 100644 index 0000000000000..c5895095f63b5 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/IndexerEngineOperations.java @@ -0,0 +1,110 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.VersionType; +import org.opensearch.index.engine.Engine; +import org.opensearch.index.mapper.DocumentMapperForType; +import org.opensearch.index.mapper.SourceToParse; + +import java.io.IOException; + +/** + * Core document operations for the indexer. + * Defines the fundamental CRUD operations and history management for document indexing. + * + * @opensearch.experimental + */ +@ExperimentalApi +public interface IndexerEngineOperations { + + /** + * Performs a document index operation. + * + * @param index the index operation containing document data and metadata + * @return result containing updated translog location, version, and document-specific failures + * @throws IOException if an I/O error occurs during indexing + */ + Engine.IndexResult index(Engine.Index index) throws IOException; + + /** + * Performs a document delete operation. + * + * @param delete the delete operation containing document identifier and metadata + * @return result containing updated translog location, version, and document-specific failures + * @throws IOException if an I/O error occurs during deletion + */ + Engine.DeleteResult delete(Engine.Delete delete) throws IOException; + + /** + * Performs a no-op operation for sequence number gap filling. + * Used to maintain sequence number continuity in the translog. + * + * @param noOp the no-op operation + * @return result of the no-op operation + * @throws IOException if an I/O error occurs + */ + Engine.NoOpResult noOp(Engine.NoOp noOp) throws IOException; + + /** + * Prepares an index operation by parsing the source document and creating an Engine.Index operation. + * This method validates and transforms the source into an executable index operation. + * + * @param docMapper the document mapper for field mapping and validation + * @param source the source document to parse + * @param seqNo the sequence number for this operation + * @param primaryTerm the primary term + * @param version the document version + * @param versionType the version type for conflict resolution + * @param origin the operation origin (primary, replica, local, peer recovery) + * @param autoGeneratedIdTimestamp timestamp for auto-generated document IDs + * @param isRetry whether this is a retry of a previous operation + * @param ifSeqNo conditional sequence number for optimistic concurrency control + * @param ifPrimaryTerm conditional primary term for optimistic concurrency control + * @return the prepared index operation ready for execution + */ + Engine.Index prepareIndex( + DocumentMapperForType docMapper, + SourceToParse source, + long seqNo, + long primaryTerm, + long version, + VersionType versionType, + Engine.Operation.Origin origin, + long autoGeneratedIdTimestamp, + boolean isRetry, + long ifSeqNo, + long ifPrimaryTerm + ); + + /** + * Prepares a delete operation by creating an Engine.Delete operation. + * + * @param id the document identifier + * @param seqNo the sequence number for this operation + * @param primaryTerm the primary term + * @param version the document version + * @param versionType the version type for conflict resolution + * @param origin the operation origin (primary, replica, local, peer recovery) + * @param ifSeqNo conditional sequence number for optimistic concurrency control + * @param ifPrimaryTerm conditional primary term for optimistic concurrency control + * @return the prepared delete operation ready for execution + */ + Engine.Delete prepareDelete( + String id, + long seqNo, + long primaryTerm, + long version, + VersionType versionType, + Engine.Operation.Origin origin, + long ifSeqNo, + long ifPrimaryTerm + ); +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/IndexerLifecycleOperations.java b/server/src/main/java/org/opensearch/index/engine/exec/IndexerLifecycleOperations.java new file mode 100644 index 0000000000000..162d84dbd7e6a --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/IndexerLifecycleOperations.java @@ -0,0 +1,176 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.common.unit.ByteSizeValue; +import org.opensearch.index.engine.EngineException; +import org.opensearch.index.engine.Segment; +import org.opensearch.index.shard.ShardPath; + +import java.io.IOException; +import java.util.List; + +/** + * Lifecycle operations for the indexer including flush, refresh, merge, and throttling. + * Manages the lifecycle of indexing operations from buffering to persistence and searchability. + * + * @opensearch.experimental + */ +@ExperimentalApi +public interface IndexerLifecycleOperations { + + /** + * Makes recently indexed documents searchable by creating new segments from buffered data. + * This operation does not guarantee durability; use flush for persistence. + * + * @param source description of what triggered the refresh (e.g., "api", "schedule") + * @throws EngineException if the refresh operation fails + */ + void refresh(String source) throws EngineException; + + /** + * Commits buffered data to disk, ensuring durability of indexed documents. + * + * @param force if true, forces a flush even if not needed + * @param waitIfOngoing if true, waits for any ongoing flush to complete; if false, returns immediately + * @throws EngineException if the flush operation fails + */ + void flush(boolean force, boolean waitIfOngoing); + + /** + * Commits buffered data to disk with default behavior (non-forced, waits if ongoing). + * + * @throws EngineException if the flush operation fails + */ + void flush(); + + /** + * Checks whether a periodic flush should be triggered based on translog size and age thresholds. + * + * @return true if a flush should be performed, false otherwise + */ + boolean shouldPeriodicallyFlush(); + + /** + * Writes the active indexing buffer to disk without committing. + * Used to free memory while keeping data in the translog. + * + * @throws EngineException if writing the buffer fails + */ + void writeIndexingBuffer() throws EngineException; + + /** + * Performs a force merge operation to reduce the number of segments. + * This is an expensive operation that should be used sparingly. + * + * @param flush whether to flush after the merge + * @param maxNumSegments target number of segments (1 for full merge) + * @param onlyExpungeDeletes if true, only merges segments with deletions + * @param upgrade if true, upgrades segments to the current format + * @param upgradeOnlyAncientSegments if true, only upgrades old format segments + * @param forceMergeUUID unique identifier for this force merge operation + * @throws EngineException if the merge operation fails + * @throws IOException if an I/O error occurs during merging + */ + void forceMerge( + boolean flush, + int maxNumSegments, + boolean onlyExpungeDeletes, + boolean upgrade, + boolean upgradeOnlyAncientSegments, + String forceMergeUUID + ) throws EngineException, IOException; + + /** + * Returns information about all segments in the index. + * + * @param verbose if true, includes detailed segment information + * @return list of segment metadata + */ + List segments(boolean verbose); + + /** + * Returns the amount of RAM currently used by the indexing buffer. + * + * @return RAM usage in bytes + */ + long getIndexBufferRAMBytesUsed(); + + /** + * Activates indexing throttling to limit indexing to one thread. + * Used to reduce load during recovery or when the system is under pressure. + * Must be matched by a call to {@link #deactivateThrottling()}. + */ + void activateThrottling(); + + /** + * Deactivates indexing throttling, allowing normal indexing concurrency. + * Must be called after {@link #activateThrottling()}. + */ + void deactivateThrottling(); + + /** + * Checks whether indexing is currently throttled. + * + * @return true if throttling is active, false otherwise + */ + boolean isThrottled(); + + /** + * Applies changes to translog and soft-deletes retention settings. + * + * @param translogRetentionAge maximum age of translog files to retain + * @param translogRetentionSize maximum size of translog files to retain + * @param softDeletesRetentionOps number of soft-deleted operations to retain + */ + void onSettingsChanged(TimeValue translogRetentionAge, ByteSizeValue translogRetentionSize, long softDeletesRetentionOps); + + /** + * Finalizes replication by applying catalog snapshot changes. + * Used in segment replication to apply received segment information. + * + * @param catalogSnapshot the catalog snapshot containing segment metadata + * @param shardPath the shard path where segments are located + * @throws IOException if finalization fails + */ + default void finalizeReplication(CatalogSnapshot catalogSnapshot, ShardPath shardPath) throws IOException { + // No-op by default + } + + /** + * Checks whether a refresh is needed based on buffered operations. + * + * @return true if a refresh should be performed, false otherwise + */ + boolean refreshNeeded(); + + /** + * Performs a refresh if needed based on internal heuristics. + * + * @param source description of what triggered the refresh check + * @return true if a refresh was performed, false otherwise + */ + boolean maybeRefresh(String source); + + /** + * Prunes deleted documents if the deletion ratio exceeds configured thresholds. + * This helps reclaim disk space by removing tombstones. + */ + void maybePruneDeletes(); + + /** + * Verifies the engine is in a valid state before closing the index. + * Ensures no ongoing operations that would prevent safe closure. + * + * @throws IllegalStateException if the engine is not in a valid state for closing + */ + void verifyEngineBeforeIndexClosing() throws IllegalStateException; +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/IndexerStateManager.java b/server/src/main/java/org/opensearch/index/engine/exec/IndexerStateManager.java new file mode 100644 index 0000000000000..7c610e8b3b589 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/IndexerStateManager.java @@ -0,0 +1,163 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.seqno.SeqNoStats; + +import java.io.IOException; + +/** + * State management for sequence numbers, checkpoints, and timestamps. + * Manages the critical state information required for replication, recovery, and optimistic concurrency control. + * + * @opensearch.experimental + */ +@ExperimentalApi +public interface IndexerStateManager { + + /** + * Returns the maximum auto-generated ID timestamp seen by this indexer. + * Used to optimize append-only operations by avoiding version lookups when safe. + * + * @return the maximum auto-generated ID timestamp in milliseconds + */ + long getMaxSeenAutoIdTimestamp(); + + /** + * Advances the max unsafe auto-generated ID timestamp marker. + * Disables append-only optimization for operations with timestamps at or below the specified value. + * + * @param newTimestamp the new timestamp threshold in milliseconds + */ + void updateMaxUnsafeAutoIdTimestamp(long newTimestamp); + + /** + * Returns the maximum sequence number of update or delete operations processed. + * Used for the MSU (max_seq_no_of_updates_or_deletes) optimization to safely convert updates to appends. + *

+ * An index request is considered an update if it overwrites an existing document with the same ID. + * This marker helps replicas determine when they can safely use addDocument instead of updateDocument. + * + * @return the maximum sequence number of updates or deletes + * @see #advanceMaxSeqNoOfUpdatesOrDeletes(long) + */ + long getMaxSeqNoOfUpdatesOrDeletes(); + + /** + * Advances the max sequence number of updates or deletes marker. + * Called by replicas when receiving this value from the primary shard. + * + * @param maxSeqNoOfUpdatesOnPrimary the max sequence number from the primary + */ + void advanceMaxSeqNoOfUpdatesOrDeletes(long maxSeqNoOfUpdatesOnPrimary); + + /** + * Returns the timestamp of the last write operation in nanoseconds. + * This value is monotonically increasing and used for tracking write activity. + * + * @return the last write timestamp in nanoseconds + */ + long getLastWriteNanos(); + + /** + * Returns the local checkpoint that has been persisted to disk. + * All operations up to and including this sequence number are durably stored. + * + * @return the persisted local checkpoint + */ + long getPersistedLocalCheckpoint(); + + /** + * Returns the local checkpoint that has been processed but not necessarily persisted. + * This may be ahead of the persisted checkpoint if operations are buffered. + * + * @return the processed local checkpoint + * @see #getPersistedLocalCheckpoint() + */ + long getProcessedLocalCheckpoint(); + + /** + * Returns sequence number statistics for this indexer. + * + * @param globalCheckpoint the global checkpoint to use in the statistics + * @return sequence number statistics including max seq no, local checkpoint, and global checkpoint + */ + SeqNoStats getSeqNoStats(long globalCheckpoint); + + /** + * Returns the latest global checkpoint that has been synced to the translog. + * This represents the sequence number up to which all shards have acknowledged. + * + * @return the last synced global checkpoint + */ + long getLastSyncedGlobalCheckpoint(); + + /** + * Returns the minimum sequence number that must be retained for recovery and replication. + * Operations below this sequence number may be pruned. + * + * @return the minimum retained sequence number + */ + long getMinRetainedSeqNo(); + + /** + * Returns the checkpoint of the last completed refresh operation. + * Used to track which operations have been made searchable. + * + * @return the last refreshed checkpoint + * @throws UnsupportedOperationException if not implemented + */ + default long lastRefreshedCheckpoint() { + throw new UnsupportedOperationException(); + } + + /** + * Returns the checkpoint of the currently ongoing refresh operation. + * Returns -1 if no refresh is in progress. + * + * @return the current ongoing refresh checkpoint, or -1 if none + * @throws UnsupportedOperationException if not implemented + */ + default long currentOngoingRefreshCheckpoint() { + throw new UnsupportedOperationException(); + } + + /** + * Counts the number of operations in the translog history within the specified sequence number range. + * Used for recovery and replication to determine the amount of history available. + * + * @param source description of why the count is being requested + * @param fromSeqNo starting sequence number (inclusive) + * @param toSeqNumber ending sequence number (inclusive) + * @return the number of operations in the specified range + * @throws IOException if an I/O error occurs while reading history + */ + int countNumberOfHistoryOperations(String source, long fromSeqNo, long toSeqNumber) throws IOException; + + /** + * Checks whether complete operation history exists from the specified sequence number. + * Returns true if all operations from startingSeqNo onwards are available in the translog. + * + * @param reason description of why the check is being performed + * @param startingSeqNo the sequence number from which complete history is required + * @return true if complete history exists from startingSeqNo, false otherwise + */ + boolean hasCompleteOperationHistory(String reason, long startingSeqNo); + + /** + * Fills sequence number gaps with no-op operations to maintain continuity. + * This ensures the local checkpoint can advance past gaps in the sequence number history. + * + * @param primaryTerm the primary term for the no-op operations + * @return the number of no-op operations added + * @throws IOException if an I/O error occurs while filling gaps + */ + int fillSeqNoGaps(long primaryTerm) throws IOException; +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/IndexerStatistics.java b/server/src/main/java/org/opensearch/index/engine/exec/IndexerStatistics.java new file mode 100644 index 0000000000000..c654ea830ba2b --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/IndexerStatistics.java @@ -0,0 +1,104 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.CommitStats; +import org.opensearch.index.engine.SegmentsStats; +import org.opensearch.index.merge.MergeStats; +import org.opensearch.index.shard.DocsStats; +import org.opensearch.indices.pollingingest.PollingIngestStats; +import org.opensearch.search.suggest.completion.CompletionStats; + +/** + * Statistics and metrics for the indexer. + * Provides access to various performance metrics, resource usage, and operational statistics. + * + * @opensearch.experimental + */ +@ExperimentalApi +public interface IndexerStatistics { + + /** + * Returns statistics about index commits. + * + * @return commit statistics including generation and user data + */ + CommitStats commitStats(); + + /** + * Returns document-level statistics. + * + * @return statistics including document count, deleted documents, and total size + */ + DocsStats docStats(); + + /** + * Returns segment-level statistics. + * + * @param includeSegmentFileSizes if true, includes individual file sizes + * @param includeUnloadedSegments if true, includes segments not currently loaded + * @return segment statistics including count, memory usage, and file information + */ + SegmentsStats segmentsStats(boolean includeSegmentFileSizes, boolean includeUnloadedSegments); + + /** + * Returns completion suggester statistics for specified field patterns. + * + * @param fieldNamePatterns field name patterns to match (supports wildcards) + * @return completion statistics including memory usage per field + */ + CompletionStats completionStats(String... fieldNamePatterns); + + /** + * Returns statistics for pull-based ingestion operations. + * + * @return polling ingestion statistics + */ + PollingIngestStats pollingIngestStats(); + + /** + * Returns merge operation statistics. + * + * @return merge statistics including total merges, time, and data volume + */ + MergeStats getMergeStats(); + + /** + * Returns the total time spent under indexing throttling. + * + * @return throttle time in milliseconds + */ + long getIndexThrottleTimeInMillis(); + + /** + * Returns the current amount of data being written to disk. + * + * @return bytes currently being written + */ + long getWritingBytes(); + + /** + * Returns the number of unreferenced file cleanup operations performed. + * Used to track garbage collection of unused index files. + * + * @return count of cleanup operations + */ + long unreferencedFileCleanUpsPerformed(); + + /** + * Returns the amount of native memory used by the indexer. + * This includes off-heap memory used by native components. + * + * @return native memory usage in bytes + */ + default long getNativeBytesUsed() { + return 0; + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/SegmentInfosCatalogSnapshot.java b/server/src/main/java/org/opensearch/index/engine/exec/SegmentInfosCatalogSnapshot.java new file mode 100644 index 0000000000000..04ddcf290385f --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/SegmentInfosCatalogSnapshot.java @@ -0,0 +1,161 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +import org.apache.lucene.index.SegmentInfos; +import org.apache.lucene.store.BufferedChecksumIndexInput; +import org.apache.lucene.store.ByteBuffersDataOutput; +import org.apache.lucene.store.ByteBuffersIndexOutput; +import org.opensearch.common.lucene.store.ByteArrayIndexInput; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.index.engine.dataformat.DataFormat; + +import java.io.IOException; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Supplier; +import java.util.stream.Collectors; + +/** + * Concrete implementation of CatalogSnapshot backed by Lucene's SegmentInfos. + * Provides a lightweight snapshot view of segment metadata without supporting + * the full catalog snapshot functionality. Used primarily for compatibility + * with Lucene-based operations. + *

+ * Note: This implementation throws UnsupportedOperationException for several + * catalog-specific methods as it's designed for basic segment information access only. + * + * @opensearch.experimental + */ +public class SegmentInfosCatalogSnapshot extends CatalogSnapshot { + + private static final String CATALOG_SNAPSHOT_KEY = "_segment_infos_catalog_snapshot_"; + + private final SegmentInfos segmentInfos; + + /** + * Constructs a catalog snapshot from Lucene SegmentInfos. + * + * @param segmentInfos the Lucene segment information + */ + public SegmentInfosCatalogSnapshot(SegmentInfos segmentInfos) { + super(CATALOG_SNAPSHOT_KEY + segmentInfos.getGeneration(), segmentInfos.getGeneration(), segmentInfos.getVersion()); + this.segmentInfos = segmentInfos; + } + + /** + * Constructs a SegmentInfosCatalogSnapshot from a StreamInput for deserialization. + * + * @param in the stream input to read from + * @throws IOException if an I/O error occurs during deserialization + */ + public SegmentInfosCatalogSnapshot(StreamInput in) throws IOException { + super(in); + byte[] segmentInfosBytes = in.readByteArray(); + this.segmentInfos = SegmentInfos.readCommit( + null, + new BufferedChecksumIndexInput(new ByteArrayIndexInput("SegmentInfos", segmentInfosBytes)), + 0L + ); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + ByteBuffersDataOutput buffer = new ByteBuffersDataOutput(); + try (ByteBuffersIndexOutput indexOutput = new ByteBuffersIndexOutput(buffer, "", null)) { + segmentInfos.write(indexOutput); + } + out.writeByteArray(buffer.toArrayCopy()); + } + + @Override + public Collection getFileMetadataList() throws IOException { + return segmentInfos.files(true).stream().map(file -> new FileMetadata(DataFormat.LUCENE, file)).collect(Collectors.toList()); + } + + /** + * Returns the underlying Lucene SegmentInfos. + * + * @return the segment information + */ + public SegmentInfos getSegmentInfos() { + return segmentInfos; + } + + @Override + public Map getUserData() { + return segmentInfos.getUserData(); + } + + @Override + public long getId() { + return generation; + } + + @Override + public List getSegments() { + throw new UnsupportedOperationException("SegmentInfosCatalogSnapshot does not support getSegments()"); + } + + @Override + public Collection getSearchableFiles(DataFormat dataFormat) { + throw new UnsupportedOperationException("SegmentInfosCatalogSnapshot does not support getSearchableFiles()"); + } + + @Override + public Set getDataFormats() { + throw new UnsupportedOperationException("SegmentInfosCatalogSnapshot does not support getDataFormats()"); + } + + @Override + public long getLastWriterGeneration() { + return -1; + } + + @Override + public String serializeToString() throws IOException { + throw new UnsupportedOperationException("SegmentInfosCatalogSnapshot does not support serializeToString()"); + } + + @Override + public void setIndexFileDeleterSupplier(Supplier supplier) { + // No-op for SegmentInfosCatalogSnapshot + } + + @Override + public void setCatalogSnapshotMap(Map catalogSnapshotMap) { + // No-op for SegmentInfosCatalogSnapshot + } + + @Override + public SegmentInfosCatalogSnapshot clone() { + return new SegmentInfosCatalogSnapshot(segmentInfos); + } + + @Override + protected void closeInternal() { + // TODO no op since SegmentInfosCatalogSnapshot is not refcounted + } + + @Override + public void setUserData(Map userData, boolean b) { + // TODO no op since SegmentInfosCatalogSnapshot is not refcounted + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/WriterFileSet.java b/server/src/main/java/org/opensearch/index/engine/exec/WriterFileSet.java new file mode 100644 index 0000000000000..ab24f8611225d --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/WriterFileSet.java @@ -0,0 +1,191 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.Writeable; + +import java.io.IOException; +import java.io.Serializable; +import java.nio.file.Path; +import java.util.HashSet; +import java.util.Set; + +/** + * Represents a set of files produced by a writer during indexing operations. + * Groups files by directory and writer generation, tracking metadata such as row count and total size. + * This class is serializable and can be transmitted across nodes. + */ +@ExperimentalApi +public class WriterFileSet implements Serializable, Writeable { + + private final String directory; + private final long writerGeneration; + private final Set files; + private final long numRows; + + public WriterFileSet(Path directory, long writerGeneration, long numRows) { + this.numRows = numRows; + this.files = new HashSet<>(); + this.writerGeneration = writerGeneration; + this.directory = directory.toString(); + } + + public WriterFileSet(StreamInput in) throws IOException { + this.directory = in.readString(); + this.writerGeneration = in.readLong(); + this.numRows = in.readVInt(); + + int fileCount = in.readVInt(); + this.files = new HashSet<>(fileCount); + for (int i = 0; i < fileCount; i++) { + this.files.add(in.readString()); + } + } + + /** + * Creates a new WriterFileSet with a different directory path while preserving other attributes. + * + * @param newDirectory the new directory path + * @return a new WriterFileSet instance with the updated directory + */ + public WriterFileSet withDirectory(String newDirectory) { + return WriterFileSet.builder() + .directory(Path.of(newDirectory)) + .writerGeneration(this.writerGeneration) + .addFiles(this.files) + .build(); + } + + /** + * Serializes this WriterFileSet to StreamOutput. + * + * @param out the stream output to write to + * @throws IOException if an I/O error occurs during serialization + */ + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(directory); + out.writeLong(writerGeneration); + out.writeVInt((int) numRows); + out.writeVInt(files.size()); + for (String file : files) { + out.writeString(file); + } + } + + public void add(String file) { + this.files.add(file); + } + + public Set getFiles() { + return files; + } + + public String getDirectory() { + return directory; + } + + public long getNumRows() { + return numRows; + } + + public long getTotalSize() { + return files.stream().mapToLong(file -> { + try { + return java.nio.file.Files.size(Path.of(directory, file)); + } catch (IOException e) { + return 0; + } + }).sum(); + } + + public long getWriterGeneration() { + return writerGeneration; + } + + @Override + public String toString() { + return "WriterFileSet{" + "directory=" + directory + ", writerGeneration=" + writerGeneration + ", files=" + files + '}'; + } + + @Override + public boolean equals(Object o) { + WriterFileSet other = (WriterFileSet) o; + return this.directory.equals(other.directory) + && this.files.equals(other.files) + && this.getWriterGeneration() == other.getWriterGeneration(); + } + + @Override + public int hashCode() { + return this.directory.hashCode() + this.files.hashCode(); + } + + /** + * Creates a new builder for constructing WriterFileSet instances. + * + * @return a new Builder instance + */ + public static Builder builder() { + return new Builder(); + } + + /** + * Builder for constructing WriterFileSet instances with fluent API. + */ + @ExperimentalApi + public static class Builder { + private Path directory; + private Long writerGeneration; + private long numRows; + private final Set files = new HashSet<>(); + + public Builder directory(Path directory) { + this.directory = directory; + return this; + } + + public Builder writerGeneration(long writerGeneration) { + this.writerGeneration = writerGeneration; + return this; + } + + public Builder addFile(String file) { + this.files.add(file); + return this; + } + + public Builder addFiles(Set files) { + this.files.addAll(files); + return this; + } + + public Builder addNumRows(long numRows) { + this.numRows = numRows; + return this; + } + + public WriterFileSet build() { + if (directory == null) { + throw new IllegalStateException("directory must be set"); + } + + if (writerGeneration == null) { + throw new IllegalStateException("writerGeneration must be set"); + } + + WriterFileSet fileSet = new WriterFileSet(directory, writerGeneration, numRows); + fileSet.files.addAll(this.files); + return fileSet; + } + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/coord/Segment.java b/server/src/main/java/org/opensearch/index/engine/exec/coord/Segment.java new file mode 100644 index 0000000000000..130b8b168ea27 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/coord/Segment.java @@ -0,0 +1,104 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.coord; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.Writeable; +import org.opensearch.index.engine.dataformat.DataFormat; +import org.opensearch.index.engine.exec.FileMetadata; +import org.opensearch.index.engine.exec.WriterFileSet; + +import java.io.IOException; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Represents a segment in the catalog snapshot containing files grouped by data format. + * Each segment has a unique generation number and maintains searchable files organized by their data format type. + * This class is serializable and can be transmitted across nodes for replication and recovery operations. + */ +@ExperimentalApi +public class Segment implements Serializable, Writeable { + + private final long generation; + private final Map dfGroupedSearchableFiles; + + public Segment(long generation) { + this.dfGroupedSearchableFiles = new HashMap<>(); + this.generation = generation; + } + + /** + * Constructs a Segment from a StreamInput for deserialization. + * + * @param in the stream input to read from + * @throws IOException if an I/O error occurs during deserialization + */ + public Segment(StreamInput in) throws IOException { + this.generation = in.readLong(); + this.dfGroupedSearchableFiles = new HashMap<>(); + int mapSize = in.readVInt(); + for (int i = 0; i < mapSize; i++) { + DataFormat dataFormat = DataFormat.of(in.readString()); + WriterFileSet writerFileSet = new WriterFileSet(in); + dfGroupedSearchableFiles.put(dataFormat, writerFileSet); + } + } + + /** + * Adds searchable files for a DataFormat to this segment. + * + * @param dataFormat the DataFormat instance + * @param writerFileSetGroup the set of files for this data format + */ + public void addSearchableFiles(DataFormat dataFormat, WriterFileSet writerFileSetGroup) { + dfGroupedSearchableFiles.put(dataFormat, writerFileSetGroup); + } + + public Map getDFGroupedSearchableFiles() { + return dfGroupedSearchableFiles; + } + + /** + * Retrieves searchable files for a DataFormat. + * + * @param dataFormat the DataFormat instance + * @return collection of FileMetadata for the specified data format + */ + public Collection getSearchableFiles(DataFormat dataFormat) { + List searchableFiles = new ArrayList<>(); + WriterFileSet fileSet = dfGroupedSearchableFiles.get(dataFormat); + if (fileSet != null) { + for (String file : fileSet.getFiles()) { + searchableFiles.add(new FileMetadata(dataFormat, file)); + } + } + return searchableFiles; + } + + public long getGeneration() { + return generation; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeLong(generation); + out.writeVInt(dfGroupedSearchableFiles.size()); + for (Map.Entry entry : dfGroupedSearchableFiles.entrySet()) { + out.writeString(entry.getKey().name()); + entry.getValue().writeTo(out); + } + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/package-info.java b/server/src/main/java/org/opensearch/index/engine/exec/package-info.java new file mode 100644 index 0000000000000..f9fb66ed51aeb --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/package-info.java @@ -0,0 +1,10 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** Classes for indexer execution, lifecycle management, and segment operations within the engine. */ +package org.opensearch.index.engine.exec; diff --git a/server/src/main/java/org/opensearch/index/search/MultiMatchQuery.java b/server/src/main/java/org/opensearch/index/search/MultiMatchQuery.java index a1c052759ac14..8a5c47e22002e 100644 --- a/server/src/main/java/org/opensearch/index/search/MultiMatchQuery.java +++ b/server/src/main/java/org/opensearch/index/search/MultiMatchQuery.java @@ -340,6 +340,7 @@ static Query blendTerms( // best effort: add clauses that are not term queries so that they have an opportunity to match // however their score contribution will be different // TODO: can we improve this? + return new DisjunctionMaxQuery(queries, tieBreaker); } } diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index 9d1ca272495e3..db0a7335973cf 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -131,12 +131,12 @@ import org.opensearch.index.engine.CommitStats; import org.opensearch.index.engine.Engine; import org.opensearch.index.engine.Engine.GetResult; +import org.opensearch.index.engine.EngineBackedIndexer; import org.opensearch.index.engine.EngineConfig; import org.opensearch.index.engine.EngineConfigFactory; import org.opensearch.index.engine.EngineException; import org.opensearch.index.engine.EngineFactory; import org.opensearch.index.engine.IngestionEngine; -import org.opensearch.index.engine.InternalEngine; import org.opensearch.index.engine.MergedSegmentWarmerFactory; import org.opensearch.index.engine.NRTReplicationEngine; import org.opensearch.index.engine.ReadOnlyEngine; @@ -144,6 +144,7 @@ import org.opensearch.index.engine.SafeCommitInfo; import org.opensearch.index.engine.Segment; import org.opensearch.index.engine.SegmentsStats; +import org.opensearch.index.engine.exec.Indexer; import org.opensearch.index.fielddata.FieldDataStats; import org.opensearch.index.fielddata.ShardFieldData; import org.opensearch.index.flush.FlushStats; @@ -314,7 +315,7 @@ public class IndexShard extends AbstractIndexShardComponent implements IndicesCl private final Object postRecoveryMutex = new Object(); private volatile long pendingPrimaryTerm; // see JavaDocs for getPendingPrimaryTerm private final Object engineMutex = new Object(); // lock ordering: engineMutex -> mutex - private final AtomicReference currentEngineReference = new AtomicReference<>(); + private final AtomicReference currentEngineReference = new AtomicReference<>(); final EngineFactory engineFactory; final EngineConfigFactory engineConfigFactory; @@ -461,7 +462,7 @@ public IndexShard( this.translogSyncProcessor = createTranslogSyncProcessor( logger, threadPool, - this::getEngine, + this::getIndexer, indexSettings.isAssignedOnRemoteNode(), () -> getRemoteTranslogUploadBufferInterval(remoteStoreSettings::getClusterRemoteTranslogBufferInterval) ); @@ -532,7 +533,7 @@ public boolean shouldCache(Query query) { final DirectoryReader wrappedReader = indexReaderWrapper == null ? reader : indexReaderWrapper.apply(reader); return DerivedSourceDirectoryReader.wrap( wrappedReader, - getEngine().config().getDocumentMapperForTypeSupplier().get().getDocumentMapper().root()::deriveSource + getIndexer().config().getDocumentMapperForTypeSupplier().get().getDocumentMapper().root()::deriveSource ); }; } else { @@ -884,7 +885,7 @@ public void updateShardState( * primary/replica re-sync completes successfully and we are now being promoted, we have to restore * the reverted operations on this shard by replaying the translog to avoid losing acknowledged writes. */ - final Engine engine = getEngine(); + final Indexer engine = getIndexer(); engine.translogManager() .restoreLocalHistoryFromTranslog( engine.getProcessedLocalCheckpoint(), @@ -1020,7 +1021,7 @@ public void relocated( } // Ensure all in-flight remote store translog upload drains, before we perform the performSegRep. - releasablesOnHandoffFailures.add(getEngine().translogManager().drainSync()); + releasablesOnHandoffFailures.add(getIndexer().translogManager().drainSync()); // no shard operation permits are being held here, move state from started to relocated assert indexShardOperationPermits.getActiveOperationsCount() == OPERATIONS_BLOCKED @@ -1131,7 +1132,7 @@ public Engine.IndexResult applyIndexOperationOnPrimary( ) throws IOException { assert versionType.validateVersionForWrites(version); return applyIndexOperation( - getEngine(), + getIndexer(), UNASSIGNED_SEQ_NO, getOperationPrimaryTerm(), version, @@ -1156,7 +1157,7 @@ public Engine.IndexResult applyIndexOperationOnReplica( SourceToParse sourceToParse ) throws IOException { return applyIndexOperation( - getEngine(), + getIndexer(), seqNo, opPrimaryTerm, version, @@ -1172,7 +1173,7 @@ public Engine.IndexResult applyIndexOperationOnReplica( } private Engine.IndexResult applyIndexOperation( - Engine engine, + Indexer engine, long seqNo, long opPrimaryTerm, long version, @@ -1203,7 +1204,7 @@ private Engine.IndexResult applyIndexOperation( UNASSIGNED_SEQ_NO, 0 ); - return getEngine().index(index); + return getIndexer().index(index); } assert opPrimaryTerm <= getOperationPrimaryTerm() : "op term [ " + opPrimaryTerm @@ -1283,7 +1284,7 @@ public static Engine.Index prepareIndex( ); } - private Engine.IndexResult index(Engine engine, Engine.Index index) throws IOException { + private Engine.IndexResult index(Indexer engine, Engine.Index index) throws IOException { active.set(true); final Engine.IndexResult result; index = indexingOperationListeners.preIndex(shardId, index); @@ -1339,10 +1340,10 @@ private Engine.IndexResult index(Engine engine, Engine.Index index) throws IOExc } public Engine.NoOpResult markSeqNoAsNoop(long seqNo, long opPrimaryTerm, String reason) throws IOException { - return markSeqNoAsNoop(getEngine(), seqNo, opPrimaryTerm, reason, Engine.Operation.Origin.REPLICA); + return markSeqNoAsNoop(getIndexer(), seqNo, opPrimaryTerm, reason, Engine.Operation.Origin.REPLICA); } - private Engine.NoOpResult markSeqNoAsNoop(Engine engine, long seqNo, long opPrimaryTerm, String reason, Engine.Operation.Origin origin) + private Engine.NoOpResult markSeqNoAsNoop(Indexer engine, long seqNo, long opPrimaryTerm, String reason, Engine.Operation.Origin origin) throws IOException { assert opPrimaryTerm <= getOperationPrimaryTerm() : "op term [ " + opPrimaryTerm @@ -1355,7 +1356,7 @@ private Engine.NoOpResult markSeqNoAsNoop(Engine engine, long seqNo, long opPrim return noOp(engine, noOp); } - private Engine.NoOpResult noOp(Engine engine, Engine.NoOp noOp) throws IOException { + private Engine.NoOpResult noOp(Indexer engine, Engine.NoOp noOp) throws IOException { active.set(true); if (logger.isTraceEnabled()) { logger.trace("noop (seq# [{}])", noOp.seqNo()); @@ -1380,7 +1381,7 @@ public Engine.DeleteResult applyDeleteOperationOnPrimary( ) throws IOException { assert versionType.validateVersionForWrites(version); return applyDeleteOperation( - getEngine(), + getIndexer(), UNASSIGNED_SEQ_NO, getOperationPrimaryTerm(), version, @@ -1406,10 +1407,10 @@ public Engine.DeleteResult applyDeleteOperationOnReplica(long seqNo, long opPrim UNASSIGNED_SEQ_NO, 0 ); - return getEngine().delete(delete); + return getIndexer().delete(delete); } return applyDeleteOperation( - getEngine(), + getIndexer(), seqNo, opPrimaryTerm, version, @@ -1422,7 +1423,7 @@ public Engine.DeleteResult applyDeleteOperationOnReplica(long seqNo, long opPrim } private Engine.DeleteResult applyDeleteOperation( - Engine engine, + Indexer engine, long seqNo, long opPrimaryTerm, long version, @@ -1463,7 +1464,7 @@ public static Engine.Delete prepareDelete( return new Engine.Delete(id, uid, seqNo, primaryTerm, version, versionType, origin, startTime, ifSeqNo, ifPrimaryTerm); } - private Engine.DeleteResult delete(Engine engine, Engine.Delete delete) throws IOException { + private Engine.DeleteResult delete(Indexer engine, Engine.Delete delete) throws IOException { active.set(true); final Engine.DeleteResult result; delete = indexingOperationListeners.preDelete(shardId, delete); @@ -1486,7 +1487,7 @@ public Engine.GetResult get(Engine.Get get) { if (mapper == null) { return GetResult.NOT_EXISTS; } - return getEngine().get(get, this::acquireSearcher); + return applyOnEngine(getIndexer(), engine -> engine.get(get, this::acquireSearcher)); } /** @@ -1497,14 +1498,14 @@ public void refresh(String source) { if (logger.isTraceEnabled()) { logger.trace("refresh with source [{}]", source); } - getEngine().refresh(source); + getIndexer().refresh(source); } /** * Returns how many bytes we are currently moving from heap to disk */ public long getWritingBytes() { - Engine engine = getEngineOrNull(); + Indexer engine = getIndexerOrNull(); if (engine == null) { return 0; } @@ -1530,7 +1531,7 @@ public FlushStats flushStats() { public DocsStats docStats() { readAllowed(); - return getEngine().docStats(); + return getIndexer().docStats(); } /** @@ -1538,7 +1539,7 @@ public DocsStats docStats() { * @throws AlreadyClosedException if shard is closed */ public CommitStats commitStats() { - return getEngine().commitStats(); + return getIndexer().commitStats(); } /** @@ -1546,11 +1547,11 @@ public CommitStats commitStats() { * @throws AlreadyClosedException if shard is closed */ public SeqNoStats seqNoStats() { - return getEngine().getSeqNoStats(replicationTracker.getGlobalCheckpoint()); + return getIndexer().getSeqNoStats(replicationTracker.getGlobalCheckpoint()); } public IndexingStats indexingStats() { - Engine engine = getEngineOrNull(); + Indexer engine = getIndexerOrNull(); final boolean throttled; final long throttleTimeInMillis; if (engine == null) { @@ -1583,7 +1584,7 @@ public StoreStats storeStats() { } public MergeStats mergeStats() { - final Engine engine = getEngineOrNull(); + final Indexer engine = getIndexerOrNull(); if (engine == null) { return new MergeStats(); } @@ -1593,7 +1594,7 @@ public MergeStats mergeStats() { } public SegmentsStats segmentStats(boolean includeSegmentFileSizes, boolean includeUnloadedSegments) { - SegmentsStats segmentsStats = getEngine().segmentsStats(includeSegmentFileSizes, includeUnloadedSegments); + SegmentsStats segmentsStats = getIndexer().segmentsStats(includeSegmentFileSizes, includeUnloadedSegments); segmentsStats.addBitsetMemoryInBytes(shardBitsetFilterCache.getMemorySizeInBytes()); // Populate remote_store stats only if the index is remote store backed if (indexSettings().isAssignedOnRemoteNode()) { @@ -1616,7 +1617,7 @@ public FieldDataStats fieldDataStats(String... fields) { } public TranslogStats translogStats() { - TranslogStats translogStats = getEngine().translogManager().getTranslogStats(); + TranslogStats translogStats = getIndexer().translogManager().getTranslogStats(); // Populate remote_store stats only if the index is remote store backed if (indexSettings.isAssignedOnRemoteNode()) { translogStats.addRemoteTranslogStats( @@ -1629,11 +1630,11 @@ public TranslogStats translogStats() { public CompletionStats completionStats(String... fields) { readAllowed(); - return getEngine().completionStats(fields); + return getIndexer().completionStats(fields); } public PollingIngestStats pollingIngestStats() { - return getEngine().pollingIngestStats(); + return getIndexer().pollingIngestStats(); } /** @@ -1652,7 +1653,7 @@ public void flush(FlushRequest request) { */ verifyNotClosed(); final long time = System.nanoTime(); - getEngine().flush(force, waitIfOngoing); + getIndexer().flush(force, waitIfOngoing); flushMetric.inc(System.nanoTime() - time); } @@ -1665,7 +1666,7 @@ public void trimTranslog() { return; } verifyNotClosed(); - final Engine engine = getEngine(); + final Indexer engine = getIndexer(); engine.translogManager().trimUnreferencedTranslogFiles(); } @@ -1673,7 +1674,7 @@ public void trimTranslog() { * Rolls the tranlog generation and cleans unneeded. */ public void rollTranslogGeneration() throws IOException { - final Engine engine = getEngine(); + final Indexer engine = getIndexer(); engine.translogManager().rollTranslogGeneration(); } @@ -1682,7 +1683,7 @@ public void forceMerge(ForceMergeRequest forceMerge) throws IOException { if (logger.isTraceEnabled()) { logger.trace("force merge with {}", forceMerge); } - Engine engine = getEngine(); + Indexer engine = getIndexer(); engine.forceMerge( forceMerge.flush(), forceMerge.maxNumSegments(), @@ -1703,7 +1704,7 @@ public org.apache.lucene.util.Version upgrade(UpgradeRequest upgrade) throws IOE } org.apache.lucene.util.Version previousVersion = minimumCompatibleVersion(); // we just want to upgrade the segments, not actually forge merge to a single segment - final Engine engine = getEngine(); + final Indexer engine = getIndexer(); engine.forceMerge( true, // we need to flush at the end to make sure the upgrade is durable Integer.MAX_VALUE, // we just want to upgrade the segments, not actually optimize to a single segment @@ -1722,7 +1723,7 @@ public org.apache.lucene.util.Version upgrade(UpgradeRequest upgrade) throws IOE public org.apache.lucene.util.Version minimumCompatibleVersion() { org.apache.lucene.util.Version luceneVersion = null; - for (Segment segment : getEngine().segments(false)) { + for (Segment segment : getIndexer().segments(false)) { if (luceneVersion == null || luceneVersion.onOrAfter(segment.getVersion())) { luceneVersion = segment.getVersion(); } @@ -1752,19 +1753,21 @@ public RemoteSegmentMetadata fetchLastRemoteUploadedSegmentMetadata() throws IOE * * @param flushFirst true if the index should first be flushed to disk / a low level lucene commit should be executed */ + @Deprecated public GatedCloseable acquireLastIndexCommit(boolean flushFirst) throws EngineException { final IndexShardState state = this.state; // one time volatile read // we allow snapshot on closed index shard, since we want to do one after we close the shard and before we close the engine if (state == IndexShardState.STARTED || state == IndexShardState.CLOSED) { - return getEngine().acquireLastIndexCommit(flushFirst); + return applyOnEngine(getIndexer(), engine -> engine.acquireLastIndexCommit(flushFirst)); } else { throw new IllegalIndexShardStateException(shardId, state, "snapshot is not allowed"); } } + @Deprecated public GatedCloseable acquireLastIndexCommitAndRefresh(boolean flushFirst) throws EngineException { GatedCloseable indexCommit = acquireLastIndexCommit(flushFirst); - getEngine().refresh("Snapshot for Remote Store based Shard"); + getIndexer().refresh("Snapshot for Remote Store based Shard"); return indexCommit; } @@ -1794,7 +1797,7 @@ public void releaseLockOnCommitData(String snapshotId, long primaryTerm, long ge public Optional getReplicationEngine() { try { - if (getEngine() instanceof NRTReplicationEngine nrtEngine) { + if (getIndexer() instanceof NRTReplicationEngine nrtEngine) { return Optional.of(nrtEngine); } else { return Optional.empty(); @@ -1898,7 +1901,7 @@ public GatedCloseable acquireSafeIndexCommit() throws EngineExcepti final IndexShardState state = this.state; // one time volatile read // we allow snapshot on closed index shard, since we want to do one after we close the shard and before we close the engine if (state == IndexShardState.STARTED || state == IndexShardState.CLOSED) { - return getEngine().acquireSafeIndexCommit(); + return getIndexer().acquireSafeIndexCommit(); } else { throw new IllegalIndexShardStateException(shardId, state, "snapshot is not allowed"); } @@ -1973,7 +1976,7 @@ ReplicationCheckpoint computeReplicationCheckpoint(SegmentInfos segmentInfos) th segmentInfos.getGeneration(), segmentInfos.getVersion(), metadataMap.values().stream().mapToLong(StoreFileMetadata::length).sum(), - getEngine().config().getCodec().getName(), + getIndexer().config().getCodec().getName(), metadataMap ); logger.trace("Recomputed ReplicationCheckpoint for shard {}", checkpoint); @@ -2009,7 +2012,7 @@ public ReferencedSegmentsCheckpoint computeReferencedSegmentsCheckpoint() throws getOperationPrimaryTerm(), segmentInfosGatedCloseable.get().getVersion(), -1, - getEngine().config().getCodec().getName(), + getIndexer().config().getCodec().getName(), Collections.emptyMap(), segmentNames ); @@ -2040,7 +2043,7 @@ public MergedSegmentCheckpoint computeMergeSegmentCheckpoint(SegmentCommitInfo s getOperationPrimaryTerm(), segmentInfosGatedCloseable.get().getVersion(), segmentMetadataMap.values().stream().mapToLong(StoreFileMetadata::length).sum(), - getEngine().config().getCodec().getName(), + getIndexer().config().getCodec().getName(), segmentMetadataMap, segmentCommitInfo.info.name ); @@ -2079,7 +2082,7 @@ public boolean isSegmentReplicationAllowed() { logger.trace( () -> new ParameterizedMessage( "Shard does not have the correct engine type to perform segment replication {}.", - getEngine().getClass() + getIndexer().getClass() ) ); return false; @@ -2142,9 +2145,9 @@ public Store.MetadataSnapshot snapshotStoreMetadata() throws IOException { synchronized (engineMutex) { // if the engine is not running, we can access the store directly, but we need to make sure no one starts // the engine on us. If the engine is running, we can get a snapshot via the deletion policy of the engine. - final Engine engine = getEngineOrNull(); - if (engine != null) { - wrappedIndexCommit = engine.acquireLastIndexCommit(false); + final Indexer indexer = getIndexerOrNull(); + if (indexer != null) { + wrappedIndexCommit = applyOnEngine(indexer, engine -> engine.acquireLastIndexCommit(false)); } if (wrappedIndexCommit == null) { return store.getMetadata(null, true); @@ -2176,7 +2179,7 @@ public Map getSegmentMetadataMap() throws IOException */ public void failShard(String reason, @Nullable Exception e) { // fail the engine. This will cause this shard to also be removed from the node's index service. - getEngine().failEngine(reason, e); + getIndexer().failEngine(reason, e); } /** @@ -2188,12 +2191,13 @@ public Engine.SearcherSupplier acquireSearcherSupplier() { /** * Acquires a point-in-time reader that can be used to create {@link Engine.Searcher}s on demand. + * TODO: Mark this as deprecated once new searcher interface is integrated into the IndexShard */ public Engine.SearcherSupplier acquireSearcherSupplier(Engine.SearcherScope scope) { readAllowed(); markSearcherAccessed(); - final Engine engine = getEngine(); - return engine.acquireSearcherSupplier(this::wrapSearcher, scope); + final Indexer engine = getIndexer(); + return applyOnEngine(engine, eng -> eng.acquireSearcherSupplier(this::wrapSearcher, scope)); } public Engine.Searcher acquireSearcher(String source) { @@ -2204,13 +2208,19 @@ private void markSearcherAccessed() { lastSearcherAccess.lazySet(threadPool.relativeTimeInMillis()); } + /** + * TODO: Mark this as deprecated once new searcher interface is integrated into the IndexShard + */ private Engine.Searcher acquireSearcher(String source, Engine.SearcherScope scope) { readAllowed(); markSearcherAccessed(); - final Engine engine = getEngine(); - return engine.acquireSearcher(source, scope, this::wrapSearcher); + final Indexer indexer = getIndexer(); + return applyOnEngine(indexer, engine -> engine.acquireSearcher(source, scope, this::wrapSearcher)); } + /** + * TODO: Mark this as deprecated once new searcher interface is integrated into the IndexShard + */ private Engine.Searcher wrapSearcher(Engine.Searcher searcher) { assert OpenSearchDirectoryReader.unwrap(searcher.getDirectoryReader()) != null : "DirectoryReader must be an instance or OpenSearchDirectoryReader"; @@ -2229,6 +2239,9 @@ private Engine.Searcher wrapSearcher(Engine.Searcher searcher) { } } + /** + * TODO: Mark this as deprecated once new searcher interface is integrated into the IndexShard + */ public static Engine.Searcher wrapSearcher( Engine.Searcher engineSearcher, CheckedFunction readerWrapper @@ -2337,7 +2350,7 @@ public void close(String reason, boolean flushEngine, boolean deleted) throws IO changeState(IndexShardState.CLOSED, reason); } } finally { - final Engine engine = this.currentEngineReference.getAndSet(null); + final Indexer engine = this.currentEngineReference.getAndSet(null); try { if (engine != null && flushEngine) { engine.flushAndClose(); @@ -2473,7 +2486,7 @@ public void postRecovery(String reason) throws IndexShardStartedException, Index // we may not expose operations that were indexed with a refresh listener that was immediately // responded to in addRefreshListener. The refresh must happen under the same mutex used in addRefreshListener // and before moving this shard to POST_RECOVERY state (i.e., allow to read from this shard). - getEngine().refresh("post_recovery"); + getIndexer().refresh("post_recovery"); synchronized (mutex) { if (state == IndexShardState.CLOSED) { throw new IndexShardClosedException(shardId); @@ -2550,7 +2563,7 @@ private long recoverLocallyUpToGlobalCheckpoint() { final TranslogRecoveryRunner translogRecoveryRunner = (snapshot) -> { recoveryState.getTranslog().totalLocal(snapshot.totalOperations()); final int recoveredOps = runTranslogRecovery( - getEngine(), + getIndexer(), snapshot, Engine.Operation.Origin.LOCAL_TRANSLOG_RECOVERY, recoveryState.getTranslog()::incrementRecoveredOperations @@ -2559,9 +2572,9 @@ private long recoverLocallyUpToGlobalCheckpoint() { return recoveredOps; }; innerOpenEngineAndTranslog(() -> globalCheckpoint); - getEngine().translogManager() - .recoverFromTranslog(translogRecoveryRunner, getEngine().getProcessedLocalCheckpoint(), globalCheckpoint); - logger.trace("shard locally recovered up to {}", getEngine().getSeqNoStats(globalCheckpoint)); + getIndexer().translogManager() + .recoverFromTranslog(translogRecoveryRunner, getIndexer().getProcessedLocalCheckpoint(), globalCheckpoint); + logger.trace("shard locally recovered up to {}", getIndexer().getSeqNoStats(globalCheckpoint)); } finally { synchronized (engineMutex) { IOUtils.close(currentEngineReference.getAndSet(null)); @@ -2637,7 +2650,7 @@ private void validateLocalRecoveryState() { } public void trimOperationOfPreviousPrimaryTerms(long aboveSeqNo) { - getEngine().translogManager().trimOperationsFromTranslog(getOperationPrimaryTerm(), aboveSeqNo); + getIndexer().translogManager().trimOperationsFromTranslog(getOperationPrimaryTerm(), aboveSeqNo); } /** @@ -2647,7 +2660,7 @@ public void trimOperationOfPreviousPrimaryTerms(long aboveSeqNo) { * @see #updateMaxUnsafeAutoIdTimestamp(long) */ public long getMaxSeenAutoIdTimestamp() { - return getEngine().getMaxSeenAutoIdTimestamp(); + return getIndexer().getMaxSeenAutoIdTimestamp(); } /** @@ -2660,14 +2673,14 @@ public long getMaxSeenAutoIdTimestamp() { * a retry append-only (without timestamp) via recovery, then an original append-only (with timestamp) via replication. */ public void updateMaxUnsafeAutoIdTimestamp(long maxSeenAutoIdTimestampFromPrimary) { - getEngine().updateMaxUnsafeAutoIdTimestamp(maxSeenAutoIdTimestampFromPrimary); + getIndexer().updateMaxUnsafeAutoIdTimestamp(maxSeenAutoIdTimestampFromPrimary); } public Engine.Result applyTranslogOperation(Translog.Operation operation, Engine.Operation.Origin origin) throws IOException { - return applyTranslogOperation(getEngine(), operation, origin); + return applyTranslogOperation(getIndexer(), operation, origin); } - private Engine.Result applyTranslogOperation(Engine engine, Translog.Operation operation, Engine.Operation.Origin origin) + private Engine.Result applyTranslogOperation(Indexer engine, Translog.Operation operation, Engine.Operation.Origin origin) throws IOException { // If a translog op is replayed on the primary (eg. ccr), we need to use external instead of null for its version type. final VersionType versionType = (origin == Engine.Operation.Origin.PRIMARY) ? VersionType.EXTERNAL : null; @@ -2726,7 +2739,7 @@ private Engine.Result applyTranslogOperation(Engine engine, Translog.Operation o * Replays translog operations from the provided translog {@code snapshot} to the current engine using the given {@code origin}. * The callback {@code onOperationRecovered} is notified after each translog operation is replayed successfully. */ - int runTranslogRecovery(Engine engine, Translog.Snapshot snapshot, Engine.Operation.Origin origin, Runnable onOperationRecovered) + int runTranslogRecovery(Indexer engine, Translog.Snapshot snapshot, Engine.Operation.Origin origin, Runnable onOperationRecovered) throws IOException { int opsRecovered = 0; Translog.Operation operation; @@ -2786,7 +2799,7 @@ public void openEngineAndRecoverFromTranslog(boolean syncFromRemote) throws IOEx translogRecoveryStats.totalOperations(snapshot.totalOperations()); translogRecoveryStats.totalOperationsOnStart(snapshot.totalOperations()); return runTranslogRecovery( - getEngine(), + getIndexer(), snapshot, Engine.Operation.Origin.LOCAL_TRANSLOG_RECOVERY, translogRecoveryStats::incrementRecoveredOperations @@ -2810,8 +2823,8 @@ public void openEngineAndRecoverFromTranslog(boolean syncFromRemote) throws IOEx translogConfig.setDownloadRemoteTranslogOnInit(true); } - getEngine().translogManager() - .recoverFromTranslog(translogRecoveryRunner, getEngine().getProcessedLocalCheckpoint(), Long.MAX_VALUE); + getIndexer().translogManager() + .recoverFromTranslog(translogRecoveryRunner, getIndexer().getProcessedLocalCheckpoint(), Long.MAX_VALUE); } /** @@ -2838,7 +2851,7 @@ void openEngineAndSkipTranslogRecovery(boolean syncFromRemote) throws IOExceptio innerOpenEngineAndTranslog(replicationTracker, syncFromRemote); assert routingEntry().isSearchOnly() == false || translogStats().estimatedNumberOfOperations() == 0 : "Translog is expected to be empty but holds " + translogStats().estimatedNumberOfOperations() + "Operations."; - getEngine().translogManager().skipTranslogRecovery(); + getIndexer().translogManager().skipTranslogRecovery(); } private void innerOpenEngineAndTranslog(LongSupplier globalCheckpointSupplier) throws IOException { @@ -2902,7 +2915,8 @@ private void innerOpenEngineAndTranslog(LongSupplier globalCheckpointSupplier, b } } // we must create a new engine under mutex (see IndexShard#snapshotStoreMetadata). - final Engine newEngine = engineFactory.newReadWriteEngine(config); + // TODO: For composite engine, this would be replaced by a separate factory. + final Indexer newEngine = new EngineBackedIndexer(engineFactory.newReadWriteEngine(config)); onNewEngine(newEngine); currentEngineReference.set(newEngine); @@ -2949,7 +2963,7 @@ private Map fetchUserData() throws IOException { return SegmentInfos.readLatestCommit(store.directory()).getUserData(); } - private void onNewEngine(Engine newEngine) { + private void onNewEngine(Indexer newEngine) { assert Thread.holdsLock(engineMutex); refreshListeners.setCurrentRefreshLocationSupplier(newEngine.translogManager()::getTranslogLastWriteLocation); @@ -3003,7 +3017,7 @@ public RecoveryState recoveryState() { */ public void finalizeRecovery() { recoveryState().setStage(RecoveryState.Stage.FINALIZE); - Engine engine = getEngine(); + Indexer engine = getIndexer(); engine.refresh("recovery_finalization"); engine.config().setEnableGcDeletes(true); } @@ -3109,7 +3123,7 @@ protected final void verifyActive() throws IllegalIndexShardStateException { * Returns number of heap bytes used by the indexing buffer for this shard, or 0 if the shard is closed */ public long getIndexBufferRAMBytesUsed() { - Engine engine = getEngineOrNull(); + Indexer engine = getIndexerOrNull(); if (engine == null) { return 0; } @@ -3129,7 +3143,7 @@ public void addShardFailureCallback(Consumer onShardFailure) { * indexing operation, so we can flush the index. */ public void flushOnIdle(long inactiveTimeNS) { - Engine engineOrNull = getEngineOrNull(); + Indexer engineOrNull = getIndexerOrNull(); if (engineOrNull != null && System.nanoTime() - engineOrNull.getLastWriteNanos() >= inactiveTimeNS) { boolean wasActive = active.getAndSet(false); if (wasActive) { @@ -3258,7 +3272,7 @@ public void restoreFromRepository(Repository repository, ActionListener * @return {@code true} if the engine should be flushed */ public boolean shouldPeriodicallyFlush() { - final Engine engine = getEngineOrNull(); + final Indexer engine = getIndexerOrNull(); if (engine != null) { try { return engine.shouldPeriodicallyFlush(); @@ -3276,7 +3290,7 @@ public boolean shouldPeriodicallyFlush() { * @return {@code true} if the current generation should be rolled to a new generation */ boolean shouldRollTranslogGeneration() { - final Engine engine = getEngineOrNull(); + final Indexer engine = getIndexerOrNull(); if (engine != null) { try { return engine.translogManager().shouldRollTranslogGeneration(); @@ -3288,7 +3302,7 @@ boolean shouldRollTranslogGeneration() { } public void onSettingsChanged() { - Engine engineOrNull = getEngineOrNull(); + Indexer engineOrNull = getIndexerOrNull(); if (engineOrNull != null) { final boolean disableTranslogRetention = indexSettings.isSoftDeleteEnabled() && useRetentionLeasesInPeerRecovery; engineOrNull.onSettingsChanged( @@ -3326,7 +3340,7 @@ protected void doRun() { * Acquires a lock on the translog files and Lucene soft-deleted documents to prevent them from being trimmed */ public Closeable acquireHistoryRetentionLock() { - return getEngine().acquireHistoryRetentionLock(); + return getIndexer().acquireHistoryRetentionLock(); } /** @@ -3336,7 +3350,7 @@ public Closeable acquireHistoryRetentionLock() { */ public Translog.Snapshot getHistoryOperations(String reason, long startingSeqNo, long endSeqNo, boolean accurateCount) throws IOException { - return getEngine().newChangesSnapshot(reason, startingSeqNo, endSeqNo, true, accurateCount); + return getIndexer().newChangesSnapshot(reason, startingSeqNo, endSeqNo, true, accurateCount); } /** @@ -3347,7 +3361,7 @@ public Translog.Snapshot getHistoryOperations(String reason, long startingSeqNo, public Translog.Snapshot getHistoryOperationsFromTranslog(long startingSeqNo, long endSeqNo) throws IOException { assert indexSettings.isSegRepEnabledOrRemoteNode() == false : "unsupported operation for segment replication enabled indices or remote store backed indices"; - return getEngine().translogManager().newChangesSnapshot(startingSeqNo, endSeqNo, true); + return getIndexer().translogManager().newChangesSnapshot(startingSeqNo, endSeqNo, true); } /** @@ -3355,7 +3369,7 @@ public Translog.Snapshot getHistoryOperationsFromTranslog(long startingSeqNo, lo * This method should be called after acquiring the retention lock; See {@link #acquireHistoryRetentionLock()} */ public boolean hasCompleteHistoryOperations(String reason, long startingSeqNo) { - return getEngine().hasCompleteOperationHistory(reason, startingSeqNo); + return getIndexer().hasCompleteOperationHistory(reason, startingSeqNo); } /** @@ -3364,7 +3378,7 @@ public boolean hasCompleteHistoryOperations(String reason, long startingSeqNo) { * @return the minimum retained sequence number */ public long getMinRetainedSeqNo() { - return getEngine().getMinRetainedSeqNo(); + return getIndexer().getMinRetainedSeqNo(); } /** @@ -3375,7 +3389,7 @@ public long getMinRetainedSeqNo() { * @return number of history operations in the sequence number range */ public int countNumberOfHistoryOperations(String source, long fromSeqNo, long toSeqNo) throws IOException { - return getEngine().countNumberOfHistoryOperations(source, fromSeqNo, toSeqNo); + return getIndexer().countNumberOfHistoryOperations(source, fromSeqNo, toSeqNo); } /** @@ -3396,15 +3410,15 @@ public Translog.Snapshot newChangesSnapshot( boolean requiredFullRange, boolean accurateCount ) throws IOException { - return getEngine().newChangesSnapshot(source, fromSeqNo, toSeqNo, requiredFullRange, accurateCount); + return getIndexer().newChangesSnapshot(source, fromSeqNo, toSeqNo, requiredFullRange, accurateCount); } public List segments(boolean verbose) { - return getEngine().segments(verbose); + return getIndexer().segments(verbose); } public String getHistoryUUID() { - return getEngine().getHistoryUUID(); + return getIndexer().getHistoryUUID(); } public IndexEventListener getIndexEventListener() { @@ -3413,7 +3427,7 @@ public IndexEventListener getIndexEventListener() { public void activateThrottling() { try { - getEngine().activateThrottling(); + getIndexer().activateThrottling(); } catch (AlreadyClosedException ex) { // ignore } @@ -3421,7 +3435,7 @@ public void activateThrottling() { public void deactivateThrottling() { try { - getEngine().deactivateThrottling(); + getIndexer().deactivateThrottling(); } catch (AlreadyClosedException ex) { // ignore } @@ -3454,7 +3468,7 @@ private void handleRefreshException(Exception e) { */ public void writeIndexingBuffer() { try { - Engine engine = getEngine(); + Indexer engine = getIndexer(); engine.writeIndexingBuffer(); } catch (Exception e) { handleRefreshException(e); @@ -3738,7 +3752,7 @@ public void markAllocationIdAsInSync(final String allocationId, final long local * @return the local checkpoint */ public long getLocalCheckpoint() { - return getEngine().getPersistedLocalCheckpoint(); + return getIndexer().getPersistedLocalCheckpoint(); } /** @@ -3746,7 +3760,7 @@ public long getLocalCheckpoint() { * Also see {@link #getLocalCheckpoint()}. */ public long getProcessedLocalCheckpoint() { - return getEngine().getProcessedLocalCheckpoint(); + return getIndexer().getProcessedLocalCheckpoint(); } /** @@ -3762,7 +3776,7 @@ public long getLastKnownGlobalCheckpoint() { * Returns the latest global checkpoint value that has been persisted in the underlying storage (i.e. translog's checkpoint) */ public long getLastSyncedGlobalCheckpoint() { - return getEngine().getLastSyncedGlobalCheckpoint(); + return getIndexer().getLastSyncedGlobalCheckpoint(); } /** @@ -3788,7 +3802,7 @@ public void maybeSyncGlobalCheckpoint(final String reason) { } assert assertPrimaryMode(); // only sync if there are no operations in flight, or when using async durability - final SeqNoStats stats = getEngine().getSeqNoStats(replicationTracker.getGlobalCheckpoint()); + final SeqNoStats stats = getIndexer().getSeqNoStats(replicationTracker.getGlobalCheckpoint()); final boolean asyncDurability = indexSettings().getTranslogDurability() == Durability.ASYNC; if (stats.getMaxSeqNo() == stats.getGlobalCheckpoint() || asyncDurability) { final Map globalCheckpoints = getInSyncGlobalCheckpoints(); @@ -3908,7 +3922,7 @@ private void postActivatePrimaryMode() { // This helps to get a consistent state in remote store where both remote segment store and remote // translog contains data. try { - getEngine().translogManager().syncTranslog(); + getIndexer().translogManager().syncTranslog(); } catch (IOException e) { logger.error("Failed to sync translog to remote from new primary", e); } @@ -4017,8 +4031,8 @@ private void doCheckIndex() throws IOException { recoveryState.getVerifyIndex().checkIndexTime(Math.max(0, TimeValue.nsecToMSec(System.nanoTime() - timeNS))); } - Engine getEngine() { - Engine engine = getEngineOrNull(); + Indexer getIndexer() { + Indexer engine = getIndexerOrNull(); if (engine == null) { throw new AlreadyClosedException("engine is closed"); } @@ -4029,23 +4043,14 @@ Engine getEngine() { * NOTE: returns null if engine is not yet started (e.g. recovery phase 1, copying over index files, is still running), or if engine is * closed. */ - protected Engine getEngineOrNull() { + protected Indexer getIndexerOrNull() { return this.currentEngineReference.get(); } // Only used for initializing segment replication CopyState public long getLastRefreshedCheckpoint() { - Engine engine = getEngine(); - if (false == engine instanceof InternalEngine) { - throw new IllegalStateException( - String.format( - Locale.ROOT, - "The type of Engine must be InternalEngine, but the current type is %s.", - engine.getClass().getSimpleName() - ) - ); - } - return ((InternalEngine) engine).lastRefreshedCheckpoint(); + Indexer engine = getIndexer(); + return engine.lastRefreshedCheckpoint(); } public void startRecovery( @@ -4229,7 +4234,7 @@ public boolean useRetentionLeasesInPeerRecovery() { } private SafeCommitInfo getSafeCommitInfo() { - final Engine engine = getEngineOrNull(); + final Indexer engine = getIndexerOrNull(); return engine == null ? SafeCommitInfo.EMPTY : engine.getSafeCommitInfo(); } @@ -4718,7 +4723,7 @@ private void innerAcquireReplicaOperationPermit( if (currentGlobalCheckpoint < maxSeqNo && indexSettings.isSegRepEnabledOrRemoteNode() == false) { resetEngineToGlobalCheckpoint(); } else { - getEngine().translogManager().rollTranslogGeneration(); + getIndexer().translogManager().rollTranslogGeneration(); } }, allowCombineOperationWithPrimaryTermUpdate ? operationListener : null); @@ -4766,7 +4771,7 @@ public List getActiveOperations() { private static AsyncIOProcessor createTranslogSyncProcessor( Logger logger, ThreadPool threadPool, - Supplier engineSupplier, + Supplier engineSupplier, boolean bufferAsyncIoProcessor, Supplier bufferIntervalSupplier ) { @@ -4822,14 +4827,14 @@ public final void sync(Translog.Location location, Consumer syncListe public void sync() throws IOException { verifyNotClosed(); - getEngine().translogManager().syncTranslog(); + getIndexer().translogManager().syncTranslog(); } /** * Checks if the underlying storage sync is required. */ public boolean isSyncNeeded() { - return getEngine().translogManager().isTranslogSyncNeeded(); + return getIndexer().translogManager().isTranslogSyncNeeded(); } /** @@ -4965,7 +4970,7 @@ ReplicationTracker getReplicationTracker() { public boolean scheduledRefresh() { verifyNotClosed(); boolean listenerNeedsRefresh = refreshListeners.refreshNeeded(); - if (isReadAllowed() && (listenerNeedsRefresh || getEngine().refreshNeeded())) { + if (isReadAllowed() && (listenerNeedsRefresh || getIndexer().refreshNeeded())) { if (listenerNeedsRefresh == false // if we have a listener that is waiting for a refresh we need to force it && isSearchIdleSupported() && isSearchIdle() @@ -4974,7 +4979,7 @@ && isSearchIdle() // lets skip this refresh since we are search idle and // don't necessarily need to refresh. the next searcher access will register a refreshListener and that will // cause the next schedule to refresh. - final Engine engine = getEngine(); + final Indexer engine = getIndexer(); engine.maybePruneDeletes(); // try to prune the deletes in the engine if we accumulated some setRefreshPending(engine); return false; @@ -4982,10 +4987,10 @@ && isSearchIdle() if (logger.isTraceEnabled()) { logger.trace("refresh with source [schedule]"); } - return getEngine().maybeRefresh("schedule"); + return getIndexer().maybeRefresh("schedule"); } } - final Engine engine = getEngine(); + final Indexer engine = getIndexer(); engine.maybePruneDeletes(); // try to prune the deletes in the engine if we accumulated some return false; } @@ -5031,8 +5036,8 @@ public final boolean hasRefreshPending() { return pendingRefreshLocation.get() != null; } - private void setRefreshPending(Engine engine) { - final Translog.Location lastWriteLocation = engine.translogManager().getTranslogLastWriteLocation(); + private void setRefreshPending(Indexer indexer) { + final Translog.Location lastWriteLocation = indexer.translogManager().getTranslogLastWriteLocation(); pendingRefreshLocation.updateAndGet(curr -> { if (curr == null || curr.compareTo(lastWriteLocation) <= 0) { return lastWriteLocation; @@ -5048,7 +5053,7 @@ private class RefreshPendingLocationListener implements ReferenceManager.Refresh @Override public void beforeRefresh() { try { - lastWriteLocation = getEngine().translogManager().getTranslogLastWriteLocation(); + lastWriteLocation = getIndexer().translogManager().getTranslogLastWriteLocation(); } catch (AlreadyClosedException exc) { // shard is closed - no location is fine lastWriteLocation = null; @@ -5222,7 +5227,7 @@ void resetEngineToGlobalCheckpoint() throws IOException { // flush to make sure the latest commit, which will be opened by the read-only engine, includes all operations. flush(new FlushRequest().waitIfOngoing(true)); - SetOnce newEngineReference = new SetOnce<>(); + SetOnce newEngineReference = new SetOnce<>(); final long globalCheckpoint = getLastKnownGlobalCheckpoint(); assert globalCheckpoint == getLastSyncedGlobalCheckpoint(); synchronized (engineMutex) { @@ -5244,7 +5249,7 @@ public GatedCloseable acquireLastIndexCommit(boolean flushFirst) { throw new AlreadyClosedException("engine was closed"); } // ignore flushFirst since we flushed above and we do not want to interfere with ongoing translog replay - return newEngineReference.get().acquireLastIndexCommit(false); + return applyOnEngine(newEngineReference.get(), engine -> engine.acquireLastIndexCommit(false)); } } @@ -5254,7 +5259,7 @@ public GatedCloseable acquireSafeIndexCommit() { if (newEngineReference.get() == null) { throw new AlreadyClosedException("engine was closed"); } - return newEngineReference.get().acquireSafeIndexCommit(); + return applyOnEngine(newEngineReference.get(), Engine::acquireSafeIndexCommit); } } @@ -5264,7 +5269,7 @@ public GatedCloseable getSegmentInfosSnapshot() { if (newEngineReference.get() == null) { throw new AlreadyClosedException("engine was closed"); } - return newEngineReference.get().getSegmentInfosSnapshot(); + return applyOnEngine(newEngineReference.get(), Engine::getSegmentInfosSnapshot); } } @@ -5272,7 +5277,7 @@ public GatedCloseable getSegmentInfosSnapshot() { public void close() throws IOException { assert Thread.holdsLock(engineMutex); - Engine newEngine = newEngineReference.get(); + Indexer newEngine = newEngineReference.get(); if (newEngine == currentEngineReference.get()) { // we successfully installed the new engine so do not close it. newEngine = null; @@ -5280,19 +5285,19 @@ public void close() throws IOException { IOUtils.close(super::close, newEngine); } }; - IOUtils.close(currentEngineReference.getAndSet(readOnlyEngine)); + IOUtils.close(currentEngineReference.getAndSet(new EngineBackedIndexer(readOnlyEngine))); if (indexSettings.isRemoteStoreEnabled() || this.isRemoteSeeded()) { syncSegmentsFromRemoteSegmentStore(false); } if ((indexSettings.isRemoteTranslogStoreEnabled() || this.isRemoteSeeded()) && shardRouting.primary()) { syncRemoteTranslogAndUpdateGlobalCheckpoint(); } - newEngineReference.set(engineFactory.newReadWriteEngine(newEngineConfig(replicationTracker))); + newEngineReference.set(new EngineBackedIndexer(engineFactory.newReadWriteEngine(newEngineConfig(replicationTracker)))); onNewEngine(newEngineReference.get()); } final TranslogRecoveryRunner translogRunner = (snapshot) -> { long startTime = System.currentTimeMillis(); - Engine engine = newEngineReference.get(); + Indexer engine = newEngineReference.get(); assert null != engine; int translogRecoveryOperations; int totalOperations = snapshot.totalOperations(); @@ -5734,7 +5739,7 @@ boolean localDirectoryContains(Directory localDirectory, String file, long check * executing that replication request on a replica. */ public long getMaxSeqNoOfUpdatesOrDeletes() { - return getEngine().getMaxSeqNoOfUpdatesOrDeletes(); + return getIndexer().getMaxSeqNoOfUpdatesOrDeletes(); } /** @@ -5754,7 +5759,7 @@ public long getMaxSeqNoOfUpdatesOrDeletes() { * @see RecoveryTarget#indexTranslogOperations(List, int, long, long, RetentionLeases, long, ActionListener) */ public void advanceMaxSeqNoOfUpdatesOrDeletes(long seqNo) { - getEngine().advanceMaxSeqNoOfUpdatesOrDeletes(seqNo); + getIndexer().advanceMaxSeqNoOfUpdatesOrDeletes(seqNo); } /** @@ -5763,7 +5768,7 @@ public void advanceMaxSeqNoOfUpdatesOrDeletes(long seqNo) { * @throws IllegalStateException if the sanity checks failed */ public void verifyShardBeforeIndexClosing() throws IllegalStateException { - getEngine().verifyEngineBeforeIndexClosing(); + getIndexer().verifyEngineBeforeIndexClosing(); } RetentionLeaseSyncer getRetentionLeaseSyncer() { @@ -5776,8 +5781,12 @@ RetentionLeaseSyncer getRetentionLeaseSyncer() { * * @throws EngineException - When segment infos cannot be safely retrieved */ + @Deprecated public GatedCloseable getSegmentInfosSnapshot() { - return getEngine().getSegmentInfosSnapshot(); + if (getIndexer() instanceof EngineBackedIndexer indexer) { + return indexer.getEngine().getSegmentInfosSnapshot(); + } + throw new IllegalStateException("Cannot request SegmentInfos directly on IndexShard"); } private TimeValue getRemoteTranslogUploadBufferInterval(Supplier clusterRemoteTranslogBufferIntervalSupplier) { @@ -5829,7 +5838,7 @@ public void updateShardIngestionState(IndexMetadata indexMetadata) { */ public void updateShardIngestionState(IngestionSettings ingestionSettings) { synchronized (engineMutex) { - if (!(getEngineOrNull() instanceof IngestionEngine ingestionEngine)) { + if (!(getIndexerOrNull() instanceof IngestionEngine ingestionEngine)) { return; } ingestionEngine.updateIngestionSettings(ingestionSettings); @@ -5841,7 +5850,7 @@ public void updateShardIngestionState(IngestionSettings ingestionSettings) { */ @Override public ShardIngestionState getIngestionState() { - Engine engine = getEngineOrNull(); + Indexer engine = getIndexerOrNull(); if (indexSettings.getIndexMetadata().useIngestionSource() == false || !(engine instanceof IngestionEngine ingestionEngine)) { throw new OpenSearchException("Unable to retrieve ingestion state as the shard does not have ingestion enabled."); } @@ -5969,4 +5978,11 @@ public String toString() { } } + public static T applyOnEngine(Indexer indexer, Function applier) { + if (indexer instanceof EngineBackedIndexer engine) { + return applier.apply(engine.getEngine()); + } else { + throw new IllegalStateException("Cannot apply function on indexer " + indexer.getClass() + " directly on IndexShard"); + } + } } diff --git a/server/src/main/java/org/opensearch/index/shard/LocalShardSnapshot.java b/server/src/main/java/org/opensearch/index/shard/LocalShardSnapshot.java index a98fc2a3b4145..80e2b0b11f79c 100644 --- a/server/src/main/java/org/opensearch/index/shard/LocalShardSnapshot.java +++ b/server/src/main/java/org/opensearch/index/shard/LocalShardSnapshot.java @@ -82,11 +82,11 @@ Index getIndex() { } long maxSeqNo() { - return shard.getEngine().getSeqNoStats(-1).getMaxSeqNo(); + return shard.getIndexer().getSeqNoStats(-1).getMaxSeqNo(); } long maxUnsafeAutoIdTimestamp() { - return Long.parseLong(shard.getEngine().commitStats().getUserData().get(Engine.MAX_UNSAFE_AUTO_ID_TIMESTAMP_COMMIT_ID)); + return Long.parseLong(shard.getIndexer().commitStats().getUserData().get(Engine.MAX_UNSAFE_AUTO_ID_TIMESTAMP_COMMIT_ID)); } Directory getSnapshotDirectory() { diff --git a/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java b/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java index 5109e48c80f63..cb61dfefe02ce 100644 --- a/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java +++ b/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java @@ -25,6 +25,7 @@ import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.UploadListener; import org.opensearch.core.action.ActionListener; +import org.opensearch.index.engine.EngineBackedIndexer; import org.opensearch.index.engine.EngineException; import org.opensearch.index.engine.InternalEngine; import org.opensearch.index.remote.RemoteSegmentTransferTracker; @@ -223,7 +224,7 @@ private boolean syncSegments() { // primaryMode to true. Due to this, the refresh that is triggered post replay of translog will not go through // if following condition does not exist. The segments created as part of translog replay will not be present // in the remote store. - return indexShard.state() != IndexShardState.STARTED || !(indexShard.getEngine() instanceof InternalEngine); + return indexShard.state() != IndexShardState.STARTED || !(indexShard.getIndexer() instanceof InternalEngine); } // Extract crypto metadata once at start of sync @@ -261,7 +262,8 @@ private boolean syncSegments() { } // Capture replication checkpoint before uploading the segments as upload can take some time and checkpoint can // move. - long lastRefreshedCheckpoint = ((InternalEngine) indexShard.getEngine()).lastRefreshedCheckpoint(); + long lastRefreshedCheckpoint = ((InternalEngine) ((EngineBackedIndexer) indexShard.getIndexer()).getEngine()) + .lastRefreshedCheckpoint(); Collection localSegmentsPostRefresh = segmentInfos.files(true); // Create a map of file name to size and update the refresh segment tracker @@ -405,7 +407,7 @@ private void onSuccessfulSegmentsSync( // Reset the backoffDelayIterator for the future failures resetBackOffDelayIterator(); // Set the minimum sequence number for keeping translog - indexShard.getEngine().translogManager().setMinSeqNoToKeep(lastRefreshedCheckpoint + 1); + indexShard.getIndexer().translogManager().setMinSeqNoToKeep(lastRefreshedCheckpoint + 1); // Publishing the new checkpoint which is used for remote store + segrep indexes checkpointPublisher.publish(indexShard, checkpoint); logger.debug("onSuccessfulSegmentsSync lastRefreshedCheckpoint={} checkpoint={}", lastRefreshedCheckpoint, checkpoint); @@ -449,14 +451,15 @@ private boolean isRefreshAfterCommitSafe() { void uploadMetadata(Collection localSegmentsPostRefresh, SegmentInfos segmentInfos, ReplicationCheckpoint replicationCheckpoint) throws IOException { - final long maxSeqNo = ((InternalEngine) indexShard.getEngine()).currentOngoingRefreshCheckpoint(); + final long maxSeqNo = ((InternalEngine) ((EngineBackedIndexer) indexShard.getIndexer()).getEngine()) + .currentOngoingRefreshCheckpoint(); SegmentInfos segmentInfosSnapshot = segmentInfos.clone(); Map userData = segmentInfosSnapshot.getUserData(); userData.put(LOCAL_CHECKPOINT_KEY, String.valueOf(maxSeqNo)); userData.put(SequenceNumbers.MAX_SEQ_NO, Long.toString(maxSeqNo)); segmentInfosSnapshot.setUserData(userData, false); - Translog.TranslogGeneration translogGeneration = indexShard.getEngine().translogManager().getTranslogGeneration(); + Translog.TranslogGeneration translogGeneration = indexShard.getIndexer().translogManager().getTranslogGeneration(); if (translogGeneration == null) { throw new UnsupportedOperationException("Encountered null TranslogGeneration while uploading metadata to remote segment store"); } else { @@ -573,8 +576,8 @@ private boolean isReadyForUpload() { if (indexShard.state() != null) { sb.append(" indexShardState=").append(indexShard.state()); } - if (indexShard.getEngineOrNull() != null) { - sb.append(" engineType=").append(indexShard.getEngine().getClass().getSimpleName()); + if (indexShard.getIndexerOrNull() != null) { + sb.append(" engineType=").append(indexShard.getIndexer().getClass().getSimpleName()); } if (indexShard.recoveryState() != null) { sb.append(" recoverySourceType=").append(indexShard.recoveryState().getRecoverySource().getType()); diff --git a/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java b/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java index 8fc64b38dc860..d032171eea6bc 100644 --- a/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java +++ b/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java @@ -203,7 +203,7 @@ void recoverFromLocalShards( internalRecoverFromStore(indexShard); // just trigger a merge to do housekeeping on the // copied segments - we will also see them in stats etc. - indexShard.getEngine().forceMerge(false, -1, false, false, false, UUIDs.randomBase64UUID()); + indexShard.getIndexer().forceMerge(false, -1, false, false, false, UUIDs.randomBase64UUID()); if (indexShard.isRemoteTranslogEnabled() && indexShard.shardRouting.primary()) { indexShard.waitForRemoteStoreSync(); } @@ -446,7 +446,7 @@ void recoverFromSnapshotAndRemoteStore( } else { indexShard.openEngineAndRecoverFromTranslog(); } - indexShard.getEngine().fillSeqNoGaps(indexShard.getPendingPrimaryTerm()); + indexShard.getIndexer().fillSeqNoGaps(indexShard.getPendingPrimaryTerm()); indexShard.finalizeRecovery(); if (indexShard.isRemoteTranslogEnabled() && indexShard.shardRouting.primary()) { indexShard.waitForRemoteStoreSync(); @@ -535,7 +535,7 @@ void recoverShallowSnapshotV2( writeEmptyRetentionLeasesFile(indexShard); indexShard.recoveryState().getIndex().setFileDetailsComplete(); indexShard.openEngineAndRecoverFromTranslog(false); - indexShard.getEngine().fillSeqNoGaps(indexShard.getPendingPrimaryTerm()); + indexShard.getIndexer().fillSeqNoGaps(indexShard.getPendingPrimaryTerm()); indexShard.finalizeRecovery(); if (indexShard.isRemoteTranslogEnabled() && indexShard.shardRouting.primary()) { indexShard.waitForRemoteStoreSync(); @@ -543,7 +543,7 @@ void recoverShallowSnapshotV2( indexShard.postRecovery("post recovery from remote_store"); SegmentInfos committedSegmentInfos = indexShard.store().readLastCommittedSegmentsInfo(); try { - indexShard.getEngine() + indexShard.getIndexer() .translogManager() .setMinSeqNoToKeep(Long.parseLong(committedSegmentInfos.getUserData().get(SequenceNumbers.MAX_SEQ_NO)) + 1); } catch (IllegalArgumentException e) { @@ -681,7 +681,7 @@ private void recoverFromRemoteStore(IndexShard indexShard) throws IndexShardReco assert indexShard.shardRouting.primary() : "only primary shards can recover from store"; indexShard.recoveryState().getIndex().setFileDetailsComplete(); indexShard.openEngineAndRecoverFromTranslog(); - indexShard.getEngine().fillSeqNoGaps(indexShard.getPendingPrimaryTerm()); + indexShard.getIndexer().fillSeqNoGaps(indexShard.getPendingPrimaryTerm()); indexShard.finalizeRecovery(); indexShard.postRecovery("post recovery from remote_store"); } catch (IOException | IndexShardRecoveryException e) { @@ -774,7 +774,7 @@ private void internalRecoverFromStore(IndexShard indexShard) throws IndexShardRe indexShard.waitForRemoteStoreSync(); logger.info("Remote Store is now seeded via local recovery for {}", indexShard.shardId()); } - indexShard.getEngine().fillSeqNoGaps(indexShard.getPendingPrimaryTerm()); + indexShard.getIndexer().fillSeqNoGaps(indexShard.getPendingPrimaryTerm()); indexShard.finalizeRecovery(); indexShard.postRecovery("post recovery from shard_store"); } catch (EngineException | IOException e) { @@ -842,7 +842,7 @@ private void completeRecovery(IndexShard indexShard, Store store) throws IOExcep bootstrapForSnapshot(indexShard, store); indexShard.openEngineAndSkipTranslogRecoveryFromSnapshot(); - indexShard.getEngine().fillSeqNoGaps(indexShard.getPendingPrimaryTerm()); + indexShard.getIndexer().fillSeqNoGaps(indexShard.getPendingPrimaryTerm()); indexShard.finalizeRecovery(); indexShard.postRecovery("Post recovery from shard_store"); } @@ -913,7 +913,7 @@ private void restore( } else { indexShard.openEngineAndRecoverFromTranslog(); } - indexShard.getEngine().fillSeqNoGaps(indexShard.getPendingPrimaryTerm()); + indexShard.getIndexer().fillSeqNoGaps(indexShard.getPendingPrimaryTerm()); indexShard.finalizeRecovery(); if (indexShard.isRemoteTranslogEnabled() && indexShard.shardRouting.primary()) { indexShard.waitForRemoteStoreSync(); @@ -942,7 +942,7 @@ private void restore( } else { indexIdListener.onResponse(indexId); } - assert indexShard.getEngineOrNull() == null; + assert indexShard.getIndexerOrNull() == null; indexIdListener.whenComplete( idx -> repository.restoreShard( indexShard.store(), diff --git a/server/src/main/java/org/opensearch/indices/IndicesModule.java b/server/src/main/java/org/opensearch/indices/IndicesModule.java index b3e7950020dff..73d3e0668f554 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesModule.java +++ b/server/src/main/java/org/opensearch/indices/IndicesModule.java @@ -43,6 +43,9 @@ import org.opensearch.core.common.io.stream.NamedWriteableRegistry.Entry; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.index.SegmentReplicationPressureService; +import org.opensearch.index.engine.dataformat.DataFormatPlugin; +import org.opensearch.index.engine.dataformat.DataFormatRegistry; +import org.opensearch.index.engine.dataformat.LuceneDataFormatPlugin; import org.opensearch.index.mapper.BinaryFieldMapper; import org.opensearch.index.mapper.BooleanFieldMapper; import org.opensearch.index.mapper.CompletionFieldMapper; @@ -109,13 +112,19 @@ public class IndicesModule extends AbstractModule { private final List namedWritables = new ArrayList<>(); private final MapperRegistry mapperRegistry; + private final DataFormatRegistry dataFormatRegistry; public IndicesModule(List mapperPlugins) { + this(mapperPlugins, Collections.emptyList()); + } + + public IndicesModule(List mapperPlugins, List dataFormatPlugins) { this.mapperRegistry = new MapperRegistry( getMappers(mapperPlugins), getMetadataMappers(mapperPlugins), getFieldFilter(mapperPlugins) ); + this.dataFormatRegistry = buildDataFormatRegistry(dataFormatPlugins); registerBuiltinWritables(); } @@ -314,4 +323,21 @@ protected void configure() { public MapperRegistry getMapperRegistry() { return mapperRegistry; } + + /** + * Builds the DataFormatRegistry with the built-in Lucene format and any plugin-provided formats. + */ + private static DataFormatRegistry buildDataFormatRegistry(List dataFormatPlugins) { + List allPlugins = new ArrayList<>(); + allPlugins.add(new LuceneDataFormatPlugin()); + allPlugins.addAll(dataFormatPlugins); + return new DataFormatRegistry(allPlugins); + } + + /** + * A registry for all data format plugins. + */ + public DataFormatRegistry getDataFormatRegistry() { + return dataFormatRegistry; + } } diff --git a/server/src/main/java/org/opensearch/indices/IndicesService.java b/server/src/main/java/org/opensearch/indices/IndicesService.java index 4293ca40af696..1b0ddee3eea77 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesService.java +++ b/server/src/main/java/org/opensearch/indices/IndicesService.java @@ -123,6 +123,7 @@ import org.opensearch.index.engine.NRTReplicationEngineFactory; import org.opensearch.index.engine.NoOpEngine; import org.opensearch.index.engine.ReadOnlyEngine; +import org.opensearch.index.engine.dataformat.DataFormatRegistry; import org.opensearch.index.fielddata.IndexFieldDataCache; import org.opensearch.index.flush.FlushStats; import org.opensearch.index.get.GetStats; @@ -385,6 +386,7 @@ public class IndicesService extends AbstractLifecycleComponent private final AtomicInteger numUncompletedDeletes = new AtomicInteger(); private final OldShardsStats oldShardsStats = new OldShardsStats(); private final MapperRegistry mapperRegistry; + private final DataFormatRegistry dataFormatRegistry; private final NamedWriteableRegistry namedWriteableRegistry; private final IndexingMemoryController indexingMemoryController; private final TimeValue cleanInterval; // clean interval for the field data cache @@ -438,6 +440,7 @@ public IndicesService( AnalysisRegistry analysisRegistry, IndexNameExpressionResolver indexNameExpressionResolver, MapperRegistry mapperRegistry, + DataFormatRegistry dataFormatRegistry, NamedWriteableRegistry namedWriteableRegistry, ThreadPool threadPool, IndexScopedSettings indexScopedSettings, @@ -484,6 +487,7 @@ public IndicesService( }), cacheService, threadPool, clusterService, nodeEnv); this.indicesQueryCache = new IndicesQueryCache(settings, clusterService.getClusterSettings()); this.mapperRegistry = mapperRegistry; + this.dataFormatRegistry = dataFormatRegistry; this.namedWriteableRegistry = namedWriteableRegistry; indexingMemoryController = new IndexingMemoryController( settings, @@ -616,6 +620,7 @@ public IndicesService( AnalysisRegistry analysisRegistry, IndexNameExpressionResolver indexNameExpressionResolver, MapperRegistry mapperRegistry, + DataFormatRegistry dataFormatRegistry, NamedWriteableRegistry namedWriteableRegistry, ThreadPool threadPool, IndexScopedSettings indexScopedSettings, @@ -646,6 +651,7 @@ public IndicesService( analysisRegistry, indexNameExpressionResolver, mapperRegistry, + dataFormatRegistry, namedWriteableRegistry, threadPool, indexScopedSettings, @@ -2207,6 +2213,13 @@ public boolean isMetadataField(String field) { return mapperRegistry.isMetadataField(field); } + /** + * Returns the data format registry containing all registered data format plugins. + */ + public DataFormatRegistry getDataFormatRegistry() { + return dataFormatRegistry; + } + /** * Returns true if fielddata is enabled for the {@link IdFieldMapper} field, false otherwise. */ diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index b19c224d887a5..757de498a0b22 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -164,6 +164,7 @@ import org.opensearch.index.compositeindex.CompositeIndexSettings; import org.opensearch.index.engine.EngineFactory; import org.opensearch.index.engine.MergedSegmentWarmerFactory; +import org.opensearch.index.engine.dataformat.DataFormatPlugin; import org.opensearch.index.mapper.MappingTransformerRegistry; import org.opensearch.index.recovery.RemoteStoreRestoreService; import org.opensearch.index.remote.RemoteIndexPathUploader; @@ -788,7 +789,8 @@ protected Node(final Environment initialEnvironment, Collection clas ); modules.add(clusterModule); final List mapperPlugins = pluginsService.filterPlugins(MapperPlugin.class); - IndicesModule indicesModule = new IndicesModule(mapperPlugins); + final List dataFormatPlugins = pluginsService.filterPlugins(DataFormatPlugin.class); + IndicesModule indicesModule = new IndicesModule(mapperPlugins, dataFormatPlugins); modules.add(indicesModule); SearchModule searchModule = new SearchModule(settings, pluginsService.filterPlugins(SearchPlugin.class)); @@ -981,6 +983,7 @@ protected Node(final Environment initialEnvironment, Collection clas analysisModule.getAnalysisRegistry(), clusterModule.getIndexNameExpressionResolver(), indicesModule.getMapperRegistry(), + indicesModule.getDataFormatRegistry(), namedWriteableRegistry, threadPool, settingsModule.getIndexScopedSettings(), diff --git a/server/src/test/java/org/opensearch/index/replication/IndexLevelReplicationTests.java b/server/src/test/java/org/opensearch/index/replication/IndexLevelReplicationTests.java index 4730360c0c782..80985dc43527d 100644 --- a/server/src/test/java/org/opensearch/index/replication/IndexLevelReplicationTests.java +++ b/server/src/test/java/org/opensearch/index/replication/IndexLevelReplicationTests.java @@ -62,7 +62,6 @@ import org.opensearch.index.seqno.SeqNoStats; import org.opensearch.index.seqno.SequenceNumbers; import org.opensearch.index.shard.IndexShard; -import org.opensearch.index.shard.IndexShardTests; import org.opensearch.index.store.Store; import org.opensearch.index.translog.SnapshotMatchers; import org.opensearch.index.translog.Translog; @@ -164,7 +163,7 @@ public void cleanFiles( future.get(); thread.join(); shards.assertAllEqual(numDocs); - Engine engine = IndexShardTests.getEngineFromShard(shards.getPrimary()); + Engine engine = getEngine(shards.getPrimary()); assertEquals(0, InternalEngineTests.getNumIndexVersionsLookups((InternalEngine) engine)); assertEquals(0, InternalEngineTests.getNumVersionLookups((InternalEngine) engine)); } @@ -733,7 +732,7 @@ public void testLateDeliveryAfterGCTriggeredOnReplica() throws Exception { final long deleteTimestamp = threadPool.relativeTimeInMillis(); replica.refresh("test"); assertBusy(() -> assertThat(threadPool.relativeTimeInMillis() - deleteTimestamp, greaterThan(gcInterval.millis()))); - getEngine(replica).maybePruneDeletes(); + getIndexer(replica).maybePruneDeletes(); indexOnReplica(indexRequest, shards, replica); // index arrives on replica lately. shards.assertAllEqual(0); } diff --git a/server/src/test/java/org/opensearch/index/shard/EngineAccess.java b/server/src/test/java/org/opensearch/index/shard/EngineAccess.java index 1b5c5aed96493..3174bc379120f 100644 --- a/server/src/test/java/org/opensearch/index/shard/EngineAccess.java +++ b/server/src/test/java/org/opensearch/index/shard/EngineAccess.java @@ -32,6 +32,7 @@ package org.opensearch.index.shard; import org.opensearch.index.engine.Engine; +import org.opensearch.index.engine.EngineBackedIndexer; /** * Test utility to access the engine of a shard @@ -39,6 +40,7 @@ public final class EngineAccess { public static Engine engine(IndexShard shard) { - return shard.getEngine(); + assert shard.getIndexer() instanceof EngineBackedIndexer; + return ((EngineBackedIndexer) shard.getIndexer()).getEngine(); } } diff --git a/server/src/test/java/org/opensearch/index/shard/IndexShardRetentionLeaseTests.java b/server/src/test/java/org/opensearch/index/shard/IndexShardRetentionLeaseTests.java index e6297c0f239e9..8e2f57023cac0 100644 --- a/server/src/test/java/org/opensearch/index/shard/IndexShardRetentionLeaseTests.java +++ b/server/src/test/java/org/opensearch/index/shard/IndexShardRetentionLeaseTests.java @@ -188,7 +188,7 @@ private void runExpirationTest(final boolean primary) throws IOException { } { - final RetentionLeases retentionLeases = indexShard.getEngine().config().retentionLeasesSupplier().get(); + final RetentionLeases retentionLeases = indexShard.getIndexer().config().retentionLeasesSupplier().get(); assertThat(retentionLeases.version(), equalTo(initialVersion)); assertThat(retentionLeases.leases(), hasSize(2)); final RetentionLease retentionLease = retentionLeases.get("0"); @@ -214,7 +214,7 @@ private void runExpirationTest(final boolean primary) throws IOException { } { - final RetentionLeases retentionLeases = indexShard.getEngine().config().retentionLeasesSupplier().get(); + final RetentionLeases retentionLeases = indexShard.getIndexer().config().retentionLeasesSupplier().get(); assertThat(retentionLeases.version(), equalTo(initialVersion + 1)); assertThat(retentionLeases.leases(), hasSize(2)); final RetentionLease retentionLease = retentionLeases.get("0"); @@ -263,7 +263,7 @@ public void testPersistence() throws IOException { indexShard.persistRetentionLeases(); // the written retention leases should equal our current retention leases - final RetentionLeases retentionLeases = indexShard.getEngine().config().retentionLeasesSupplier().get(); + final RetentionLeases retentionLeases = indexShard.getIndexer().config().retentionLeasesSupplier().get(); final RetentionLeases writtenRetentionLeases = indexShard.loadRetentionLeases(); assertThat(writtenRetentionLeases.version(), equalTo(1L + length)); assertThat(writtenRetentionLeases.leases(), contains(retentionLeases.leases().toArray(new RetentionLease[0]))); @@ -275,7 +275,7 @@ public void testPersistence() throws IOException { ); try { recoverShardFromStore(recoveredShard); - final RetentionLeases recoveredRetentionLeases = recoveredShard.getEngine().config().retentionLeasesSupplier().get(); + final RetentionLeases recoveredRetentionLeases = recoveredShard.getIndexer().config().retentionLeasesSupplier().get(); assertThat(recoveredRetentionLeases.version(), equalTo(1L + length)); assertThat(recoveredRetentionLeases.leases(), contains(retentionLeases.leases().toArray(new RetentionLease[0]))); } finally { @@ -292,7 +292,7 @@ public void testPersistence() throws IOException { ); try { recoverShardFromStore(forceRecoveredShard); - final RetentionLeases recoveredRetentionLeases = forceRecoveredShard.getEngine().config().retentionLeasesSupplier().get(); + final RetentionLeases recoveredRetentionLeases = forceRecoveredShard.getIndexer().config().retentionLeasesSupplier().get(); assertThat(recoveredRetentionLeases.leases(), hasSize(1)); assertThat( recoveredRetentionLeases.leases().iterator().next().id(), diff --git a/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java index db05151b7261c..42aaeac822e91 100644 --- a/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java @@ -102,6 +102,7 @@ import org.opensearch.index.engine.CommitStats; import org.opensearch.index.engine.DocIdSeqNoAndSource; import org.opensearch.index.engine.Engine; +import org.opensearch.index.engine.EngineBackedIndexer; import org.opensearch.index.engine.EngineConfig; import org.opensearch.index.engine.EngineConfigFactory; import org.opensearch.index.engine.EngineTestCase; @@ -110,6 +111,7 @@ import org.opensearch.index.engine.NRTReplicationEngine; import org.opensearch.index.engine.NRTReplicationEngineFactory; import org.opensearch.index.engine.ReadOnlyEngine; +import org.opensearch.index.engine.exec.Indexer; import org.opensearch.index.fielddata.FieldDataStats; import org.opensearch.index.fielddata.IndexFieldData; import org.opensearch.index.fielddata.IndexFieldDataCache; @@ -241,8 +243,8 @@ public static void write(ShardStateMetadata shardStateMetadata, Path... shardPat ShardStateMetadata.FORMAT.writeAndCleanup(shardStateMetadata, shardPaths); } - public static Engine getEngineFromShard(IndexShard shard) { - return shard.getEngineOrNull(); + public static Indexer getIndexerFromShard(IndexShard shard) { + return shard.getIndexerOrNull(); } public void testWriteShardState() throws Exception { @@ -1318,7 +1320,7 @@ public void testGlobalCheckpointSync() throws IOException { recoverReplica(replicaShard, primaryShard, true); final int maxSeqNo = randomIntBetween(0, 128); for (int i = 0; i <= maxSeqNo; i++) { - EngineTestCase.generateNewSeqNo(primaryShard.getEngine()); + EngineTestCase.generateNewSeqNo(getEngine(primaryShard)); } final long checkpoint = rarely() ? maxSeqNo - scaledRandomIntBetween(0, maxSeqNo) : maxSeqNo; @@ -1481,7 +1483,7 @@ public void testRollbackReplicaEngineOnPromotion() throws IOException, Interrupt final CountDownLatch latch = new CountDownLatch(1); final boolean shouldRollback = Math.max(globalCheckpoint, globalCheckpointOnReplica) < indexShard.seqNoStats().getMaxSeqNo() && indexShard.seqNoStats().getMaxSeqNo() != SequenceNumbers.NO_OPS_PERFORMED; - final Engine beforeRollbackEngine = indexShard.getEngine(); + final Indexer beforeRollbackIndexer = indexShard.getIndexer(); final long newMaxSeqNoOfUpdates = randomLongBetween(indexShard.getMaxSeqNoOfUpdatesOrDeletes(), Long.MAX_VALUE); randomReplicaOperationPermitAcquisition( indexShard, @@ -1511,9 +1513,9 @@ public void onFailure(final Exception e) { } assertThat(getShardDocUIDs(indexShard), equalTo(docsBelowGlobalCheckpoint)); if (shouldRollback) { - assertThat(indexShard.getEngine(), not(sameInstance(beforeRollbackEngine))); + assertThat(indexShard.getIndexer(), not(sameInstance(beforeRollbackIndexer))); } else { - assertThat(indexShard.getEngine(), sameInstance(beforeRollbackEngine)); + assertThat(indexShard.getIndexer(), sameInstance(beforeRollbackIndexer)); } assertThat(indexShard.getMaxSeqNoOfUpdatesOrDeletes(), equalTo(newMaxSeqNoOfUpdates)); // ensure that after the local checkpoint throw back and indexing again, the local checkpoint advances @@ -1683,7 +1685,7 @@ public void testMinimumCompatVersion() throws IOException { assertEquals(versionCreated.luceneVersion, test.minimumCompatibleVersion()); indexDoc(test, "_doc", "test"); assertEquals(versionCreated.luceneVersion, test.minimumCompatibleVersion()); - test.getEngine().flush(); + test.getIndexer().flush(); assertEquals(Version.CURRENT.luceneVersion, test.minimumCompatibleVersion()); closeShards(test); @@ -2339,7 +2341,7 @@ public void testRecoverFromStoreWithOutOfOrderDelete() throws IOException { long primaryTerm = shard.getOperationPrimaryTerm(); shard.advanceMaxSeqNoOfUpdatesOrDeletes(1); // manually advance msu for this delete shard.applyDeleteOperationOnReplica(1, primaryTerm, 2, "id"); - shard.getEngine().translogManager().rollTranslogGeneration(); // isolate the delete in it's own generation + shard.getIndexer().translogManager().rollTranslogGeneration(); // isolate the delete in it's own generation shard.applyIndexOperationOnReplica( UUID.randomUUID().toString(), 0, @@ -2391,7 +2393,7 @@ public void testRecoverFromStoreWithOutOfOrderDelete() throws IOException { replayedOps = 3; } else { if (randomBoolean()) { - shard.getEngine().translogManager().rollTranslogGeneration(); + shard.getIndexer().translogManager().rollTranslogGeneration(); } translogOps = 5; replayedOps = 5; @@ -2667,7 +2669,7 @@ public void testRecoverFromStoreRemoveStaleOperations() throws Exception { ); flushShard(shard); assertThat(getShardDocUIDs(shard), containsInAnyOrder("doc-0", "doc-1")); - shard.getEngine().translogManager().rollTranslogGeneration(); + shard.getIndexer().translogManager().rollTranslogGeneration(); shard.markSeqNoAsNoop(1, primaryTerm, "test"); shard.applyIndexOperationOnReplica( UUID.randomUUID().toString(), @@ -2769,7 +2771,7 @@ public void testRestoreShard() throws IOException { IndexShard target = newStartedShard(true); indexDoc(source, "_doc", "0"); - EngineTestCase.generateNewSeqNo(source.getEngine()); // create a gap in the history + EngineTestCase.generateNewSeqNo(getEngine(source)); // create a gap in the history indexDoc(source, "_doc", "2"); if (randomBoolean()) { source.refresh("test"); @@ -2961,7 +2963,7 @@ public void testRestoreShardFromRemoteStore(boolean performFlush) throws IOExcep // Make sure to drain refreshes from the shard. Otherwise, if the refresh is in-progress, it overlaps with // deletion of segment files in the subsequent code block. - for (ReferenceManager.RefreshListener refreshListener : target.getEngine().config().getInternalRefreshListener()) { + for (ReferenceManager.RefreshListener refreshListener : target.getIndexer().config().getInternalRefreshListener()) { if (refreshListener instanceof ReleasableRetryableRefreshListener) { ((ReleasableRetryableRefreshListener) refreshListener).drainRefreshes(); } @@ -3573,7 +3575,7 @@ public void testRecoverFromTranslog() throws IOException { primary.recoveryState().getTranslog().totalOperationsOnStart(snapshot.totalOperations()); primary.state = IndexShardState.RECOVERING; // translog recovery on the next line would otherwise fail as we are in POST_RECOVERY primary.runTranslogRecovery( - primary.getEngine(), + primary.getIndexer(), snapshot, Engine.Operation.Origin.LOCAL_TRANSLOG_RECOVERY, primary.recoveryState().getTranslog()::incrementRecoveredOperations @@ -4035,7 +4037,7 @@ public void testReadSnapshotConcurrently() throws IOException, InterruptedExcept assertTrue("at least 2 files, commit and data: " + storeFileMetadatas.toString(), storeFileMetadatas.size() > 1); AtomicBoolean stop = new AtomicBoolean(false); CountDownLatch latch = new CountDownLatch(1); - expectThrows(AlreadyClosedException.class, () -> newShard.getEngine()); // no engine + expectThrows(AlreadyClosedException.class, () -> newShard.getIndexer()); // no engine Thread thread = new Thread(() -> { latch.countDown(); while (stop.get() == false) { @@ -4067,7 +4069,7 @@ public void testReadSnapshotConcurrently() throws IOException, InterruptedExcept public void testCheckpointRefreshListener() throws IOException { final SegmentReplicationCheckpointPublisher mock = mock(SegmentReplicationCheckpointPublisher.class); IndexShard shard = newStartedShard(p -> newShard(true, mock), true); - List refreshListeners = shard.getEngine().config().getInternalRefreshListener(); + List refreshListeners = shard.getIndexer().config().getInternalRefreshListener(); assertTrue(refreshListeners.stream().anyMatch(e -> e instanceof CheckpointRefreshListener)); closeShards(shard); } @@ -4078,7 +4080,7 @@ public void testCheckpointRefreshListener() throws IOException { public void testCheckpointRefreshListenerWithNull() throws IOException { final SegmentReplicationCheckpointPublisher publisher = null; IndexShard shard = newStartedShard(p -> newShard(true, publisher), true); - List refreshListeners = shard.getEngine().config().getInternalRefreshListener(); + List refreshListeners = shard.getIndexer().config().getInternalRefreshListener(); assertFalse(refreshListeners.stream().anyMatch(e -> e instanceof CheckpointRefreshListener)); closeShards(shard); } @@ -4412,7 +4414,7 @@ public void testIsSearchIdle() throws Exception { IndexShard primary = newShard(new ShardId(metadata.getIndex(), 0), true, "n1", metadata, null); recoverShardFromStore(primary); indexDoc(primary, "_doc", "0", "{\"foo\" : \"bar\"}"); - assertTrue(primary.getEngine().refreshNeeded()); + assertTrue(primary.getIndexer().refreshNeeded()); assertTrue(primary.scheduledRefresh()); assertFalse(primary.isSearchIdle()); @@ -4462,15 +4464,15 @@ public void testScheduledRefresh() throws Exception { IndexShard primary = newShard(new ShardId(metadata.getIndex(), 0), true, "n1", metadata, null); recoverShardFromStore(primary); indexDoc(primary, "_doc", "0", "{\"foo\" : \"bar\"}"); - assertTrue(primary.getEngine().refreshNeeded()); + assertTrue(primary.getIndexer().refreshNeeded()); assertTrue(primary.scheduledRefresh()); IndexScopedSettings scopedSettings = primary.indexSettings().getScopedSettings(); settings = Settings.builder().put(settings).put(IndexSettings.INDEX_SEARCH_IDLE_AFTER.getKey(), TimeValue.ZERO).build(); scopedSettings.applySettings(settings); - assertFalse(primary.getEngine().refreshNeeded()); + assertFalse(primary.getIndexer().refreshNeeded()); indexDoc(primary, "_doc", "1", "{\"foo\" : \"bar\"}"); - assertTrue(primary.getEngine().refreshNeeded()); + assertTrue(primary.getIndexer().refreshNeeded()); long lastSearchAccess = primary.getLastSearcherAccess(); assertFalse(primary.scheduledRefresh()); assertEquals(lastSearchAccess, primary.getLastSearcherAccess()); @@ -4496,7 +4498,7 @@ public void testScheduledRefresh() throws Exception { try (Engine.Searcher searcher = primary.acquireSearcher("test")) { assertEquals(1, searcher.getIndexReader().numDocs()); } - assertTrue(primary.getEngine().refreshNeeded()); + assertTrue(primary.getIndexer().refreshNeeded()); assertTrue(primary.scheduledRefresh()); latch.await(); CountDownLatch latch1 = new CountDownLatch(1); @@ -4536,13 +4538,13 @@ public void testRefreshIsNeededWithRefreshListeners() throws IOException, Interr IndexShard primary = newShard(new ShardId(metadata.getIndex(), 0), true, "n1", metadata, null); recoverShardFromStore(primary); indexDoc(primary, "_doc", "0", "{\"foo\" : \"bar\"}"); - assertTrue(primary.getEngine().refreshNeeded()); + assertTrue(primary.getIndexer().refreshNeeded()); assertTrue(primary.scheduledRefresh()); Engine.IndexResult doc = indexDoc(primary, "_doc", "1", "{\"foo\" : \"bar\"}"); CountDownLatch latch = new CountDownLatch(1); primary.addRefreshListener(doc.getTranslogLocation(), r -> latch.countDown()); assertEquals(1, latch.getCount()); - assertTrue(primary.getEngine().refreshNeeded()); + assertTrue(primary.getIndexer().refreshNeeded()); assertTrue(primary.scheduledRefresh()); latch.await(); @@ -4554,7 +4556,7 @@ public void testRefreshIsNeededWithRefreshListeners() throws IOException, Interr CountDownLatch latch1 = new CountDownLatch(1); primary.addRefreshListener(doc.getTranslogLocation(), r -> latch1.countDown()); assertEquals(1, latch1.getCount()); - assertTrue(primary.getEngine().refreshNeeded()); + assertTrue(primary.getIndexer().refreshNeeded()); assertTrue(primary.scheduledRefresh()); latch1.await(); closeShards(primary); @@ -4595,7 +4597,7 @@ public void testOnCloseStats() throws IOException { public void testSupplyTombstoneDoc() throws Exception { IndexShard shard = newStartedShard(); String id = randomRealisticUnicodeOfLengthBetween(1, 10); - ParsedDocument deleteTombstone = shard.getEngine().config().getTombstoneDocSupplier().newDeleteTombstoneDoc(id); + ParsedDocument deleteTombstone = shard.getIndexer().config().getTombstoneDocSupplier().newDeleteTombstoneDoc(id); assertThat(deleteTombstone.docs(), hasSize(1)); ParseContext.Document deleteDoc = deleteTombstone.docs().get(0); assertThat( @@ -4613,7 +4615,7 @@ public void testSupplyTombstoneDoc() throws Exception { assertThat(deleteDoc.getField(SeqNoFieldMapper.TOMBSTONE_NAME).numericValue().longValue(), equalTo(1L)); final String reason = randomUnicodeOfLength(200); - ParsedDocument noopTombstone = shard.getEngine().config().getTombstoneDocSupplier().newNoopTombstoneDoc(reason); + ParsedDocument noopTombstone = shard.getIndexer().config().getTombstoneDocSupplier().newNoopTombstoneDoc(reason); assertThat(noopTombstone.docs(), hasSize(1)); ParseContext.Document noopDoc = noopTombstone.docs().get(0); assertThat( @@ -4862,7 +4864,7 @@ public void testResetEngineWithBrokenTranslog() throws Exception { Translog.Snapshot snapshot = TestTranslog.newSnapshotFromOperations(operations); final MapperParsingException error = expectThrows( MapperParsingException.class, - () -> shard.runTranslogRecovery(shard.getEngine(), snapshot, Engine.Operation.Origin.LOCAL_RESET, () -> {}) + () -> shard.runTranslogRecovery(shard.getIndexer(), snapshot, Engine.Operation.Origin.LOCAL_RESET, () -> {}) ); assertThat(error.getMessage(), containsString("failed to parse field [foo] of type [text]")); } finally { @@ -5049,16 +5051,16 @@ public void testReadOnlyReplicaEngineConfig() throws IOException { .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) .build(); final IndexShard primaryShard = newStartedShard(false, primarySettings, new NRTReplicationEngineFactory()); - assertFalse(primaryShard.getEngine().config().isReadOnlyReplica()); - assertEquals(primaryShard.getEngine().getClass(), InternalEngine.class); + assertFalse(primaryShard.getIndexer().config().isReadOnlyReplica()); + assertEquals(primaryShard.getIndexer().getClass(), InternalEngine.class); Settings replicaSettings = Settings.builder() .put(primarySettings) .put(IndexMetadata.SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT) .build(); final IndexShard replicaShard = newStartedShard(false, replicaSettings, new NRTReplicationEngineFactory()); - assertTrue(replicaShard.getEngine().config().isReadOnlyReplica()); - assertEquals(replicaShard.getEngine().getClass(), NRTReplicationEngine.class); + assertTrue(replicaShard.getIndexer().config().isReadOnlyReplica()); + assertEquals(getEngine(replicaShard), NRTReplicationEngine.class); closeShards(primaryShard, replicaShard); } @@ -5070,12 +5072,12 @@ public void testTranslogFactoryWithoutRemoteStore() throws IOException { .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) .build(); final IndexShard primaryShard = newStartedShard(true, primarySettings, new NRTReplicationEngineFactory()); - assertEquals(primaryShard.getEngine().getClass(), InternalEngine.class); - assertEquals(primaryShard.getEngine().config().getTranslogFactory().getClass(), InternalTranslogFactory.class); + assertEquals(primaryShard.getIndexer().getClass(), InternalEngine.class); + assertEquals(primaryShard.getIndexer().config().getTranslogFactory().getClass(), InternalTranslogFactory.class); final IndexShard replicaShard = newStartedShard(true, primarySettings, new NRTReplicationEngineFactory()); - assertEquals(replicaShard.getEngine().getClass(), InternalEngine.class); - assertEquals(replicaShard.getEngine().config().getTranslogFactory().getClass(), InternalTranslogFactory.class); + assertEquals(replicaShard.getIndexer().getClass(), InternalEngine.class); + assertEquals(replicaShard.getIndexer().config().getTranslogFactory().getClass(), InternalTranslogFactory.class); closeShards(primaryShard, replicaShard); } @@ -5087,8 +5089,8 @@ public void testTranslogFactoryForReplicaShardWithoutRemoteStore() throws IOExce .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) .build(); final IndexShard primaryShard = newStartedShard(false, primarySettings, new NRTReplicationEngineFactory()); - assertEquals(primaryShard.getEngine().getClass(), InternalEngine.class); - assertEquals(primaryShard.getEngine().config().getTranslogFactory().getClass(), InternalTranslogFactory.class); + assertEquals(primaryShard.getIndexer().getClass(), InternalEngine.class); + assertEquals(primaryShard.getIndexer().config().getTranslogFactory().getClass(), InternalTranslogFactory.class); closeShards(primaryShard); } @@ -5103,8 +5105,8 @@ public void testTranslogFactoryForRemoteTranslogBackedPrimaryShard() throws IOEx .put(IndexMetadata.SETTING_REMOTE_TRANSLOG_STORE_REPOSITORY, "txlog-test") .build(); final IndexShard primaryShard = newStartedShard(true, primarySettings, new NRTReplicationEngineFactory()); - assertEquals(primaryShard.getEngine().getClass(), InternalEngine.class); - assertEquals(primaryShard.getEngine().config().getTranslogFactory().getClass(), RemoteBlobStoreInternalTranslogFactory.class); + assertEquals(primaryShard.getIndexer().getClass(), InternalEngine.class); + assertEquals(primaryShard.getIndexer().config().getTranslogFactory().getClass(), RemoteBlobStoreInternalTranslogFactory.class); closeShards(primaryShard); } @@ -5119,8 +5121,8 @@ public void testTranslogFactoryForRemoteTranslogBackedReplicaShard() throws IOEx .put(IndexMetadata.SETTING_REMOTE_TRANSLOG_STORE_REPOSITORY, "txlog-test") .build(); final IndexShard replicaShard = newStartedShard(false, primarySettings, new NRTReplicationEngineFactory()); - assertEquals(replicaShard.getEngine().getClass(), NRTReplicationEngine.class); - assertEquals(replicaShard.getEngine().config().getTranslogFactory().getClass(), InternalTranslogFactory.class); + assertEquals(getEngine(replicaShard).getClass(), NRTReplicationEngine.class); + assertEquals(replicaShard.getIndexer().config().getTranslogFactory().getClass(), InternalTranslogFactory.class); closeShards(replicaShard); } @@ -5177,16 +5179,16 @@ public void testCloseShardWhileEngineIsWarming() throws Exception { public void testRecordsForceMerges() throws IOException { IndexShard shard = newStartedShard(true); - final String initialForceMergeUUID = ((InternalEngine) shard.getEngine()).getForceMergeUUID(); + final String initialForceMergeUUID = ((InternalEngine) ((EngineBackedIndexer) shard.getIndexer()).getEngine()).getForceMergeUUID(); assertThat(initialForceMergeUUID, nullValue()); final ForceMergeRequest firstForceMergeRequest = new ForceMergeRequest().maxNumSegments(1); shard.forceMerge(firstForceMergeRequest); - final String secondForceMergeUUID = ((InternalEngine) shard.getEngine()).getForceMergeUUID(); + final String secondForceMergeUUID = ((InternalEngine) ((EngineBackedIndexer) shard.getIndexer()).getEngine()).getForceMergeUUID(); assertThat(secondForceMergeUUID, notNullValue()); assertThat(secondForceMergeUUID, equalTo(firstForceMergeRequest.forceMergeUUID())); final ForceMergeRequest secondForceMergeRequest = new ForceMergeRequest().maxNumSegments(1); shard.forceMerge(secondForceMergeRequest); - final String thirdForceMergeUUID = ((InternalEngine) shard.getEngine()).getForceMergeUUID(); + final String thirdForceMergeUUID = ((InternalEngine) ((EngineBackedIndexer) shard.getIndexer()).getEngine()).getForceMergeUUID(); assertThat(thirdForceMergeUUID, notNullValue()); assertThat(thirdForceMergeUUID, not(equalTo(secondForceMergeUUID))); assertThat(thirdForceMergeUUID, equalTo(secondForceMergeRequest.forceMergeUUID())); diff --git a/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java index d2a07546fe68a..2489477ce9cd3 100644 --- a/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java @@ -156,7 +156,7 @@ public void testNRTReplicaWithRemoteStorePromotedAsPrimary(boolean performFlushF oldPrimary.close("demoted", false, false); oldPrimary.store().close(); - assertEquals(InternalEngine.class, nextPrimary.getEngine().getClass()); + assertEquals(InternalEngine.class, nextPrimary.getIndexer().getClass()); assertDocCounts(nextPrimary, totalDocs, totalDocs); // refresh and push segments to our other replica. @@ -193,7 +193,7 @@ public void testNoDuplicateSeqNo() throws Exception { CountDownLatch latch = new CountDownLatch(1); shards.promoteReplicaToPrimary(replicaShard, (shard, listener) -> { try { - assertAtMostOneLuceneDocumentPerSequenceNumber(replicaShard.getEngine()); + assertAtMostOneLuceneDocumentPerSequenceNumber(getEngine(replicaShard)); } catch (IOException e) { throw new RuntimeException(e); } diff --git a/server/src/test/java/org/opensearch/index/shard/RemoteStoreRefreshListenerTests.java b/server/src/test/java/org/opensearch/index/shard/RemoteStoreRefreshListenerTests.java index a1d504f57778e..4def4e17918bd 100644 --- a/server/src/test/java/org/opensearch/index/shard/RemoteStoreRefreshListenerTests.java +++ b/server/src/test/java/org/opensearch/index/shard/RemoteStoreRefreshListenerTests.java @@ -118,7 +118,7 @@ public void tearDown() throws Exception { Directory storeDirectory = ((FilterDirectory) ((FilterDirectory) indexShard.store().directory()).getDelegate()).getDelegate(); ((BaseDirectoryWrapper) storeDirectory).setCheckIndexOnClose(false); - for (ReferenceManager.RefreshListener refreshListener : indexShard.getEngine().config().getInternalRefreshListener()) { + for (ReferenceManager.RefreshListener refreshListener : indexShard.getIndexer().config().getInternalRefreshListener()) { if (refreshListener instanceof ReleasableRetryableRefreshListener) { ((ReleasableRetryableRefreshListener) refreshListener).drainRefreshes(); } @@ -796,8 +796,8 @@ private Tuple mockIn successLatch.countDown(); logger.info("Value fo latch {}", successLatch.getCount()); } - return indexShard.getEngine(); - }).when(shard).getEngine(); + return indexShard.getIndexer(); + }).when(shard).getIndexer(); SegmentReplicationCheckpointPublisher emptyCheckpointPublisher = spy(SegmentReplicationCheckpointPublisher.EMPTY); AtomicLong checkpointPublisherCounter = new AtomicLong(); diff --git a/server/src/test/java/org/opensearch/index/shard/ReplicaRecoveryWithRemoteTranslogOnPrimaryTests.java b/server/src/test/java/org/opensearch/index/shard/ReplicaRecoveryWithRemoteTranslogOnPrimaryTests.java index 85864eebd6d0d..5debfbfcfc2bc 100644 --- a/server/src/test/java/org/opensearch/index/shard/ReplicaRecoveryWithRemoteTranslogOnPrimaryTests.java +++ b/server/src/test/java/org/opensearch/index/shard/ReplicaRecoveryWithRemoteTranslogOnPrimaryTests.java @@ -130,11 +130,11 @@ public void testNoTranslogHistoryTransferred() throws Exception { shards.startAll(); assertEquals(docIdAndSeqNosAfterFlush, getDocIdAndSeqNos(replica)); assertDocCount(replica, numDocs); - assertEquals(NRTReplicationEngine.class, replica.getEngine().getClass()); + assertEquals(NRTReplicationEngine.class, getEngine(replica)); // Step 3 - Check replica's translog has no operations - assertEquals(WriteOnlyTranslogManager.class, replica.getEngine().translogManager().getClass()); - WriteOnlyTranslogManager replicaTranslogManager = (WriteOnlyTranslogManager) replica.getEngine().translogManager(); + assertEquals(WriteOnlyTranslogManager.class, replica.getIndexer().translogManager().getClass()); + WriteOnlyTranslogManager replicaTranslogManager = (WriteOnlyTranslogManager) replica.getIndexer().translogManager(); assertEquals(0, replicaTranslogManager.getTranslog().totalOperations()); // Adding this for close to succeed diff --git a/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java index 8666e222c9dec..d517721db07d7 100644 --- a/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java @@ -492,7 +492,7 @@ public void testSegmentReplication_With_ReaderClosedConcurrently() throws Except replicateSegments(primaryShard, shards.getReplicas()); IndexShard spyShard = spy(replicaShard); - Engine.Searcher test = replicaShard.getEngine().acquireSearcher("testSegmentReplication_With_ReaderClosedConcurrently"); + Engine.Searcher test = getEngine(replicaShard).acquireSearcher("testSegmentReplication_With_ReaderClosedConcurrently"); shards.assertAllEqual(numDocs); // Step 2. Ingest numDocs documents again & replicate to replica shard @@ -554,7 +554,7 @@ public void testSegmentReplication_With_EngineClosedConcurrently() throws Except // cleans up recently copied over files IndexShard spyShard = spy(replicaShard); doAnswer(n -> { - NRTReplicationEngine engine = (NRTReplicationEngine) replicaShard.getEngine(); + NRTReplicationEngine engine = (NRTReplicationEngine) replicaShard.getIndexer(); // Using engine.close() prevents indexShard.finalizeReplication execution due to engine AlreadyClosedException, // thus as workaround, use updateSegments which eventually calls commitSegmentInfos on latest segment infos. engine.updateSegments(engine.getSegmentInfosSnapshot().get()); @@ -975,7 +975,7 @@ public void testNoDuplicateSeqNo() throws Exception { CountDownLatch latch = new CountDownLatch(1); shards.promoteReplicaToPrimary(replicaShard, (shard, listener) -> { try { - assertAtMostOneLuceneDocumentPerSequenceNumber(replicaShard.getEngine()); + assertAtMostOneLuceneDocumentPerSequenceNumber(getEngine(replicaShard)); } catch (IOException e) { throw new RuntimeException(e); } @@ -1004,7 +1004,7 @@ public void testQueryDuringEngineResetShowsDocs() throws Exception { final AtomicReference failed = new AtomicReference<>(); doAnswer(ans -> { try { - final Engine engineOrNull = replicaShard.getEngineOrNull(); + final Engine engineOrNull = getEngine(replicaShard); assertNotNull(engineOrNull); assertTrue(engineOrNull instanceof ReadOnlyEngine); shards.assertAllEqual(10); @@ -1100,7 +1100,7 @@ public void testSnapshotWhileFailoverIncomplete() throws Exception { CountDownLatch latch = new CountDownLatch(1); doAnswer(ans -> { - final Engine engineOrNull = replicaShard.getEngineOrNull(); + final Engine engineOrNull = getEngine(replicaShard); assertNotNull(engineOrNull); assertTrue(engineOrNull instanceof ReadOnlyEngine); shards.assertAllEqual(10); diff --git a/server/src/test/java/org/opensearch/index/shard/SegmentReplicationWithNodeToNodeIndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/SegmentReplicationWithNodeToNodeIndexShardTests.java index 8a221e8ec961f..ba3e54738f024 100644 --- a/server/src/test/java/org/opensearch/index/shard/SegmentReplicationWithNodeToNodeIndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/SegmentReplicationWithNodeToNodeIndexShardTests.java @@ -294,7 +294,7 @@ public void onFailure(Exception e) { } }, ThreadPool.Names.GENERIC, ""); latch.await(); - assertEquals(nextPrimary.getEngine().getClass(), InternalEngine.class); + assertEquals(nextPrimary.getIndexer().getClass(), InternalEngine.class); nextPrimary.refresh("test"); oldPrimary.close("demoted", false, false); @@ -409,7 +409,7 @@ public void testTemporaryFilesNotCleanup() throws Exception { .collect(Collectors.toList()); // Step 4. Perform a commit on replica shard. - NRTReplicationEngine engine = (NRTReplicationEngine) indexShard.getEngine(); + NRTReplicationEngine engine = (NRTReplicationEngine) indexShard.getIndexer(); engine.updateSegments(engine.getSegmentInfosSnapshot().get()); // Step 5. Validate temporary files are not deleted from store. @@ -605,8 +605,8 @@ private void doPrimaryPromotion(ReplicationGroup shards, int numDocsInFirstBatch oldPrimary = shards.addReplicaWithExistingPath(oldPrimary.shardPath(), oldPrimary.routingEntry().currentNodeId()); shards.recoverReplica(oldPrimary); - assertEquals(NRTReplicationEngine.class, oldPrimary.getEngine().getClass()); - assertEquals(InternalEngine.class, nextPrimary.getEngine().getClass()); + assertEquals(NRTReplicationEngine.class, getEngine(oldPrimary)); + assertEquals(InternalEngine.class, nextPrimary.getIndexer().getClass()); assertDocCounts(nextPrimary, totalDocs, totalDocs); assertEquals(0, nextPrimary.translogStats().estimatedNumberOfOperations()); diff --git a/server/src/test/java/org/opensearch/index/shard/ShardGetServiceTests.java b/server/src/test/java/org/opensearch/index/shard/ShardGetServiceTests.java index 0f27bc2bd126b..e261a244742cc 100644 --- a/server/src/test/java/org/opensearch/index/shard/ShardGetServiceTests.java +++ b/server/src/test/java/org/opensearch/index/shard/ShardGetServiceTests.java @@ -66,31 +66,31 @@ public void testGetForUpdate() throws IOException { IndexShard primary = newShard(new ShardId(metadata.getIndex(), 0), true, "n1", metadata, null); recoverShardFromStore(primary); Engine.IndexResult test = indexDoc(primary, "test", "0", "{\"foo\" : \"bar\"}"); - assertTrue(primary.getEngine().refreshNeeded()); + assertTrue(primary.getIndexer().refreshNeeded()); GetResult testGet = primary.getService().getForUpdate("0", UNASSIGNED_SEQ_NO, UNASSIGNED_PRIMARY_TERM); assertFalse(testGet.getFields().containsKey(RoutingFieldMapper.NAME)); assertEquals(new String(testGet.source(), StandardCharsets.UTF_8), "{\"foo\" : \"bar\"}"); - try (Engine.Searcher searcher = primary.getEngine().acquireSearcher("test", Engine.SearcherScope.INTERNAL)) { + try (Engine.Searcher searcher = getEngine(primary).acquireSearcher("test", Engine.SearcherScope.INTERNAL)) { assertEquals(searcher.getIndexReader().maxDoc(), 1); // we refreshed } Engine.IndexResult test1 = indexDoc(primary, "1", "{\"foo\" : \"baz\"}", MediaTypeRegistry.JSON, "foobar"); - assertTrue(primary.getEngine().refreshNeeded()); + assertTrue(primary.getIndexer().refreshNeeded()); GetResult testGet1 = primary.getService().getForUpdate("1", UNASSIGNED_SEQ_NO, UNASSIGNED_PRIMARY_TERM); assertEquals(new String(testGet1.source(), StandardCharsets.UTF_8), "{\"foo\" : \"baz\"}"); assertTrue(testGet1.getFields().containsKey(RoutingFieldMapper.NAME)); assertEquals("foobar", testGet1.getFields().get(RoutingFieldMapper.NAME).getValue()); - try (Engine.Searcher searcher = primary.getEngine().acquireSearcher("test", Engine.SearcherScope.INTERNAL)) { + try (Engine.Searcher searcher = getEngine(primary).acquireSearcher("test", Engine.SearcherScope.INTERNAL)) { assertEquals(searcher.getIndexReader().maxDoc(), 1); // we read from the translog } - primary.getEngine().refresh("test"); - try (Engine.Searcher searcher = primary.getEngine().acquireSearcher("test", Engine.SearcherScope.INTERNAL)) { + primary.getIndexer().refresh("test"); + try (Engine.Searcher searcher = getEngine(primary).acquireSearcher("test", Engine.SearcherScope.INTERNAL)) { assertEquals(searcher.getIndexReader().maxDoc(), 2); } // now again from the reader Engine.IndexResult test2 = indexDoc(primary, "1", "{\"foo\" : \"baz\"}", MediaTypeRegistry.JSON, "foobar"); - assertTrue(primary.getEngine().refreshNeeded()); + assertTrue(primary.getIndexer().refreshNeeded()); testGet1 = primary.getService().getForUpdate("1", UNASSIGNED_SEQ_NO, UNASSIGNED_PRIMARY_TERM); assertEquals(new String(testGet1.source(), StandardCharsets.UTF_8), "{\"foo\" : \"baz\"}"); assertTrue(testGet1.getFields().containsKey(RoutingFieldMapper.NAME)); @@ -149,41 +149,41 @@ private void runGetFromTranslogWithOptions( IndexShard primary = newShard(new ShardId(metadata.getIndex(), 0), true, "n1", metadata, null); recoverShardFromStore(primary); Engine.IndexResult test = indexDoc(primary, MapperService.SINGLE_MAPPING_NAME, "0", docToIndex); - assertTrue(primary.getEngine().refreshNeeded()); + assertTrue(primary.getIndexer().refreshNeeded()); GetResult testGet = primary.getService().getForUpdate("0", UNASSIGNED_SEQ_NO, UNASSIGNED_PRIMARY_TERM); assertFalse(testGet.getFields().containsKey(RoutingFieldMapper.NAME)); assertEquals(new String(testGet.source() == null ? new byte[0] : testGet.source(), StandardCharsets.UTF_8), expectedResult); - try (Engine.Searcher searcher = primary.getEngine().acquireSearcher("test", Engine.SearcherScope.INTERNAL)) { + try (Engine.Searcher searcher = getEngine(primary).acquireSearcher("test", Engine.SearcherScope.INTERNAL)) { assertEquals(searcher.getIndexReader().maxDoc(), 1); // we refreshed } Engine.IndexResult test1 = indexDoc(primary, "1", docToIndex, MediaTypeRegistry.JSON, "foobar"); - assertTrue(primary.getEngine().refreshNeeded()); + assertTrue(primary.getIndexer().refreshNeeded()); GetResult testGet1 = primary.getService().getForUpdate("1", UNASSIGNED_SEQ_NO, UNASSIGNED_PRIMARY_TERM); assertEquals(new String(testGet1.source() == null ? new byte[0] : testGet1.source(), StandardCharsets.UTF_8), expectedResult); assertTrue(testGet1.getFields().containsKey(RoutingFieldMapper.NAME)); assertEquals("foobar", testGet1.getFields().get(RoutingFieldMapper.NAME).getValue()); - try (Engine.Searcher searcher = primary.getEngine().acquireSearcher("test", Engine.SearcherScope.INTERNAL)) { + try (Engine.Searcher searcher = getEngine(primary).acquireSearcher("test", Engine.SearcherScope.INTERNAL)) { assertEquals(searcher.getIndexReader().maxDoc(), 1); // we read from the translog } - primary.getEngine().refresh("test"); - try (Engine.Searcher searcher = primary.getEngine().acquireSearcher("test", Engine.SearcherScope.INTERNAL)) { + primary.getIndexer().refresh("test"); + try (Engine.Searcher searcher = getEngine(primary).acquireSearcher("test", Engine.SearcherScope.INTERNAL)) { assertEquals(searcher.getIndexReader().maxDoc(), 2); } Engine.IndexResult test2 = indexDoc(primary, "2", docToIndex, MediaTypeRegistry.JSON, "foobar"); - assertTrue(primary.getEngine().refreshNeeded()); + assertTrue(primary.getIndexer().refreshNeeded()); GetResult testGet2 = primary.getService() .get("2", new String[] { "foo" }, true, 1, VersionType.INTERNAL, FetchSourceContext.FETCH_SOURCE); assertEquals(new String(testGet2.source() == null ? new byte[0] : testGet2.source(), StandardCharsets.UTF_8), expectedResult); assertTrue(testGet2.getFields().containsKey(RoutingFieldMapper.NAME)); assertTrue(testGet2.getFields().containsKey("foo")); assertEquals(expectedFooVal, testGet2.getFields().get("foo").getValue()); - try (Engine.Searcher searcher = primary.getEngine().acquireSearcher("test", Engine.SearcherScope.INTERNAL)) { + try (Engine.Searcher searcher = getEngine(primary).acquireSearcher("test", Engine.SearcherScope.INTERNAL)) { assertEquals(searcher.getIndexReader().maxDoc(), 2); // we read from the translog } - primary.getEngine().refresh("test"); - try (Engine.Searcher searcher = primary.getEngine().acquireSearcher("test", Engine.SearcherScope.INTERNAL)) { + primary.getIndexer().refresh("test"); + try (Engine.Searcher searcher = getEngine(primary).acquireSearcher("test", Engine.SearcherScope.INTERNAL)) { assertEquals(searcher.getIndexReader().maxDoc(), 3); } diff --git a/server/src/test/java/org/opensearch/indices/recovery/RecoveryTests.java b/server/src/test/java/org/opensearch/indices/recovery/RecoveryTests.java index b065456373935..8ba57eadabb5f 100644 --- a/server/src/test/java/org/opensearch/indices/recovery/RecoveryTests.java +++ b/server/src/test/java/org/opensearch/indices/recovery/RecoveryTests.java @@ -437,7 +437,7 @@ public void testShouldFlushAfterPeerRecovery() throws Exception { replica.onSettingsChanged(); shards.recoverReplica(replica); // Make sure the flushing will eventually be completed (eg. `shouldPeriodicallyFlush` is false) - assertBusy(() -> assertThat(getEngine(replica).shouldPeriodicallyFlush(), equalTo(false))); + assertBusy(() -> assertThat(getIndexer(replica).shouldPeriodicallyFlush(), equalTo(false))); boolean softDeletesEnabled = replica.indexSettings().isSoftDeleteEnabled(); assertThat(getTranslog(replica).totalOperations(), equalTo(softDeletesEnabled ? 0 : numDocs)); shards.assertAllEqual(numDocs); diff --git a/server/src/test/java/org/opensearch/indices/replication/OngoingSegmentReplicationsTests.java b/server/src/test/java/org/opensearch/indices/replication/OngoingSegmentReplicationsTests.java index 024fd8e5e6034..9c687598fca94 100644 --- a/server/src/test/java/org/opensearch/indices/replication/OngoingSegmentReplicationsTests.java +++ b/server/src/test/java/org/opensearch/indices/replication/OngoingSegmentReplicationsTests.java @@ -74,7 +74,7 @@ public void setUp() throws Exception { ShardId testShardId = primary.shardId(); - CodecService codecService = new CodecService(null, getEngine(primary).config().getIndexSettings(), null, List.of()); + CodecService codecService = new CodecService(null, getIndexer(primary).config().getIndexSettings(), null, List.of()); String defaultCodecName = codecService.codec(CodecService.DEFAULT_CODEC).getName(); // This mirrors the creation of the ReplicationCheckpoint inside CopyState diff --git a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java index ae6c5ba87a8ce..42a3d5a25fc09 100644 --- a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java @@ -189,6 +189,7 @@ import org.opensearch.index.SegmentReplicationStatsTracker; import org.opensearch.index.analysis.AnalysisRegistry; import org.opensearch.index.engine.MergedSegmentWarmerFactory; +import org.opensearch.index.engine.dataformat.DataFormatRegistry; import org.opensearch.index.mapper.MappingTransformerRegistry; import org.opensearch.index.remote.RemoteStorePressureService; import org.opensearch.index.remote.RemoteStoreStatsTrackerFactory; @@ -2083,6 +2084,7 @@ public void onFailure(final Exception e) { ); final BigArrays bigArrays = new BigArrays(new PageCacheRecycler(settings), null, "test"); final MapperRegistry mapperRegistry = new IndicesModule(Collections.emptyList()).getMapperRegistry(); + final DataFormatRegistry dataFormatRegistry = new IndicesModule(Collections.emptyList()).getDataFormatRegistry(); final SetOnce repositoriesServiceReference = new SetOnce<>(); repositoriesServiceReference.set(repositoriesService); indicesService = new IndicesService( @@ -2104,6 +2106,7 @@ public void onFailure(final Exception e) { ), indexNameExpressionResolver, mapperRegistry, + dataFormatRegistry, namedWriteableRegistry, threadPool, indexScopedSettings, diff --git a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java index e2fa5b70e5f03..7e236cf911060 100644 --- a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java +++ b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java @@ -93,12 +93,16 @@ import org.opensearch.index.cache.query.DisabledQueryCache; import org.opensearch.index.engine.DocIdSeqNoAndSource; import org.opensearch.index.engine.Engine; +import org.opensearch.index.engine.EngineBackedIndexer; import org.opensearch.index.engine.EngineConfigFactory; import org.opensearch.index.engine.EngineFactory; import org.opensearch.index.engine.EngineTestCase; +import org.opensearch.index.engine.InternalEngine; import org.opensearch.index.engine.InternalEngineFactory; import org.opensearch.index.engine.MergedSegmentWarmerFactory; +import org.opensearch.index.engine.NRTReplicationEngine; import org.opensearch.index.engine.NRTReplicationEngineFactory; +import org.opensearch.index.engine.exec.Indexer; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.mapper.SourceToParse; import org.opensearch.index.remote.RemoteStoreStatsTrackerFactory; @@ -119,9 +123,11 @@ import org.opensearch.index.store.lockmanager.RemoteStoreLockManager; import org.opensearch.index.store.lockmanager.RemoteStoreMetadataLockManager; import org.opensearch.index.translog.InternalTranslogFactory; +import org.opensearch.index.translog.InternalTranslogManager; import org.opensearch.index.translog.RemoteBlobStoreInternalTranslogFactory; import org.opensearch.index.translog.Translog; import org.opensearch.index.translog.TranslogFactory; +import org.opensearch.index.translog.TranslogManager; import org.opensearch.indices.DefaultRemoteStoreSettings; import org.opensearch.indices.IndicesService; import org.opensearch.indices.breaker.HierarchyCircuitBreakerService; @@ -1060,7 +1066,9 @@ protected void closeShard(IndexShard shard, boolean assertConsistencyBetweenTran if (assertConsistencyBetweenTranslogAndLucene) { assertConsistentHistoryBetweenTranslogAndLucene(shard); } - final Engine engine = shard.getEngineOrNull(); + final Engine engine = shard.getIndexerOrNull() instanceof EngineBackedIndexer engineBackedIndexer + ? engineBackedIndexer.getEngine() + : null; if (engine != null) { EngineTestCase.assertAtMostOneLuceneDocumentPerSequenceNumber(engine); } @@ -1350,7 +1358,7 @@ public static Set getShardDocUIDs(final IndexShard shard) throws IOExcep } public static List getDocIdAndSeqNos(final IndexShard shard) throws IOException { - return EngineTestCase.getDocIds(shard.getEngine(), true); + return EngineTestCase.getDocIds(((EngineBackedIndexer) shard.getIndexer()).getEngine(), true); } protected void assertDocCount(IndexShard shard, int docDount) throws IOException { @@ -1367,7 +1375,9 @@ public static void assertConsistentHistoryBetweenTranslogAndLucene(IndexShard sh if (shard.state() != IndexShardState.POST_RECOVERY && shard.state() != IndexShardState.STARTED) { return; } - final Engine engine = shard.getEngineOrNull(); + final Engine engine = shard.getIndexerOrNull() instanceof EngineBackedIndexer engineBackedIndexer + ? engineBackedIndexer.getEngine() + : null; if (engine != null) { EngineTestCase.assertConsistentHistoryBetweenTranslogAndLuceneIndex(engine); } @@ -1548,11 +1558,24 @@ protected String snapshotShard(final IndexShard shard, final Snapshot snapshot, * Helper method to access (package-protected) engine from tests */ public static Engine getEngine(IndexShard indexShard) { - return indexShard.getEngine(); + return ((EngineBackedIndexer) indexShard.getIndexer()).getEngine(); + } + + public static Indexer getIndexer(IndexShard indexShard) { + return indexShard.getIndexer(); } public static Translog getTranslog(IndexShard shard) { - return EngineTestCase.getTranslog(getEngine(shard)); + Indexer indexer = getIndexer(shard); + Engine engine = getEngine(shard); + assert engine instanceof InternalEngine || engine instanceof NRTReplicationEngine + : "only InternalEngines or NRTReplicationEngines have translogs, got: " + engine.getClass(); + indexer.ensureOpen(); + TranslogManager translogManager = indexer.translogManager(); + assert translogManager instanceof InternalTranslogManager : "only InternalTranslogManager have translogs, got: " + + engine.getClass(); + InternalTranslogManager internalTranslogManager = (InternalTranslogManager) translogManager; + return internalTranslogManager.getTranslog(); } public static ReplicationTracker getReplicationTracker(IndexShard indexShard) {