diff --git a/.github/workflows/detect-breaking-change.yml b/.github/workflows/detect-breaking-change.yml index 3d3fe3664d30d..c137667384a7f 100644 --- a/.github/workflows/detect-breaking-change.yml +++ b/.github/workflows/detect-breaking-change.yml @@ -15,7 +15,7 @@ jobs: with: cache-disabled: true arguments: japicmp - gradle-version: 9.1.0 + gradle-version: 9.4.1 build-root-directory: server - if: failure() run: cat server/build/reports/java-compatibility/report.txt diff --git a/.github/workflows/sandbox-check.yml b/.github/workflows/sandbox-check.yml new file mode 100644 index 0000000000000..fc710f499fb89 --- /dev/null +++ b/.github/workflows/sandbox-check.yml @@ -0,0 +1,43 @@ +name: Sandbox Check +on: + push: + pull_request: + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.run_id }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + sandbox-check: + if: github.repository == 'opensearch-project/OpenSearch' + runs-on: ubuntu-latest + continue-on-error: true + timeout-minutes: 60 + steps: + - uses: actions/checkout@v6 + - name: Remove unnecessary files + run: | + sudo rm -rf /usr/share/dotnet + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + - name: Set up JDK 25 + uses: actions/setup-java@v5 + with: + java-version: 25 + distribution: temurin + cache: gradle + - name: Set up Rust + uses: dtolnay/rust-toolchain@stable + - name: Install protobuf compiler + run: sudo apt-get update && sudo apt-get install -y protobuf-compiler + - name: Run sandbox check + run: ./gradlew check -p sandbox -Dsandbox.enabled=true + - name: Upload test results + if: always() + uses: actions/upload-artifact@v4 + with: + name: sandbox-test-results + path: sandbox/**/build/reports/tests/ + retention-days: 7 diff --git a/.gitignore b/.gitignore index 1f2fe4ab3a1a8..ddcf6afd46b41 100644 --- a/.gitignore +++ b/.gitignore @@ -71,3 +71,4 @@ testfixtures_shared/ doc-tools/missing-doclet/bin/ **/Cargo.lock /sandbox/plugins/analytics-backend-datafusion/target/ +/sandbox/libs/dataformat-native/rust/target diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 70214aa7bae39..0e310e12fd930 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -138,7 +138,7 @@ We have a lot of mechanisms to help expedite towards an accepted PR. Here are so - `@opensearch.internal`: Marks internal classes subject to rapid changes. - `@opensearch.api`: Marks public-facing API classes with backward compatibility guarantees. - `@opensearch.experimental`: Indicates rapidly changing [experimental code](./DEVELOPER_GUIDE.md#experimental-development). -5. *Employ sandbox for significant core changes*: Any new features or enhancements that make changes to core classes (e.g., search phases, codecs, or specialized lucene APIs) are more likely to. be merged if they are sandboxed. This can only be enabled on the java CLI (`-Dsandbox.enabled=true`). +5. *Employ sandbox for significant core changes*: Any new features or enhancements that make changes to core classes (e.g., search phases, codecs, or specialized lucene APIs) are more likely to be merged if they are sandboxed. Sandbox is disabled by default and can be enabled on the Java CLI with `-Dsandbox.enabled=true`. 6. *Micro-benchmark critical path*: This is a lesser known mechanism, but if you have critical path changes you're afraid will impact performance (the changes touch the garbage collector, heap, direct memory, or CPU) then including a [microbenchmark](https://github.com/opensearch-project/OpenSearch/tree/main/benchmarks) with your PR (and jfr or flamegraph results in the description) is a *GREAT IDEA* and will help expedite the review process. 7. *Test rigorously*: Ensure thorough testing ([OpenSearchTestCase](./test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java) for unit tests, [OpenSearchIntegTestCase](./test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java) for integration & cluster tests, [OpenSearchRestTestCase](./test/framework/src/main/java/org/opensearch/test/rest/OpenSearchRestTestCase.java) for testing REST endpoint interfaces, and yaml tests with [ClientYamlTestSuiteIT](./rest-api-spec/src/yamlRestTest/java/org/opensearch/test/rest/ClientYamlTestSuiteIT.java) for REST integration tests) diff --git a/DEVELOPER_GUIDE.md b/DEVELOPER_GUIDE.md index 1bf9458c940ad..f64fdabc6dd64 100644 --- a/DEVELOPER_GUIDE.md +++ b/DEVELOPER_GUIDE.md @@ -313,11 +313,11 @@ Another example is the `discovery-gce` plugin. It is *vital* to folks running in ### `sandbox` -This is where the community can add experimental features in to OpenSearch. There are three directories inside the sandbox - `libs`, `modules` and `plugins` - which mirror the subdirectories in the project root and have the same guidelines for deciding on where a new feature goes. The artifacts from `libs` and `modules` will be automatically included in the **snapshot** distributions. Once a certain feature is deemed worthy to be included in the OpenSearch release, it will be promoted to the corresponding subdirectory in the project root. **Note**: The sandbox code do not have any other guarantees such as backwards compatibility or long term support and can be removed at any time. +This is where the community can add experimental features in to OpenSearch. There are three directories inside the sandbox - `libs`, `modules` and `plugins` - which mirror the subdirectories in the project root and have the same guidelines for deciding on where a new feature goes. The artifacts from `libs` and `modules` can be included in the **snapshot** distributions when sandbox is enabled. Once a certain feature is deemed worthy to be included in the OpenSearch release, it will be promoted to the corresponding subdirectory in the project root. **Note**: The sandbox code do not have any other guarantees such as backwards compatibility or long term support and can be removed at any time. -To exclude the modules from snapshot distributions, use the `sandbox.enabled` system property. +To include sandbox modules in snapshot distributions, use the `sandbox.enabled` system property. - ./gradlew assemble -Dsandbox.enabled=false + ./gradlew assemble -Dsandbox.enabled=true ### `qa` diff --git a/buildSrc/src/main/java/org/opensearch/gradle/docker/DockerSupportService.java b/buildSrc/src/main/java/org/opensearch/gradle/docker/DockerSupportService.java index 66048e29356ab..e389eed437a1b 100644 --- a/buildSrc/src/main/java/org/opensearch/gradle/docker/DockerSupportService.java +++ b/buildSrc/src/main/java/org/opensearch/gradle/docker/DockerSupportService.java @@ -34,6 +34,7 @@ import org.apache.tools.ant.taskdefs.condition.Os; import org.opensearch.gradle.Version; import org.opensearch.gradle.info.BuildParams; +import org.opensearch.gradle.util.ExecutableUtils; import org.gradle.api.GradleException; import org.gradle.api.logging.Logger; import org.gradle.api.logging.Logging; @@ -50,7 +51,6 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; -import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; @@ -65,24 +65,20 @@ */ public abstract class DockerSupportService implements BuildService { - private static Logger LOGGER = Logging.getLogger(DockerSupportService.class); - // Defines the possible locations of the Docker CLI. These will be searched in order. - private static String[] DOCKER_BINARIES_UNIX = { "/usr/bin/docker", "/usr/local/bin/docker" }; + private static final Logger LOGGER = Logging.getLogger(DockerSupportService.class); - private static String[] DOCKER_BINARIES_WINDOWS = { - System.getenv("PROGRAMFILES") + "\\Docker\\Docker\\resources\\bin\\docker.exe", - System.getenv("SystemRoot") + "\\System32\\docker.exe" /* Github Actions */ }; + private static final String DOCKER_FILENAME = Os.isFamily(Os.FAMILY_WINDOWS) ? "docker.exe" : "docker"; - private static String[] DOCKER_BINARIES = Os.isFamily(Os.FAMILY_WINDOWS) ? DOCKER_BINARIES_WINDOWS : DOCKER_BINARIES_UNIX; + private static final String DOCKER_COMPOSE_FILENAME = Os.isFamily(Os.FAMILY_WINDOWS) ? "docker-compose.exe" : "docker-compose"; - private static String[] DOCKER_COMPOSE_BINARIES_UNIX = { "/usr/local/bin/docker-compose", "/usr/bin/docker-compose" }; + private static final String[] DEFAULT_PATH_UNIX = { "/usr/bin", "/usr/local/bin" }; - private static String[] DOCKER_COMPOSE_BINARIES_WINDOWS = { - System.getenv("PROGRAMFILES") + "\\Docker\\Docker\\resources\\bin\\docker-compose.exe" }; + private static final String[] DEFAULT_PATH_WINDOWS = { + System.getenv("PROGRAMFILES") + "\\Docker\\Docker\\resources\\bin", + System.getenv("SystemRoot") + "\\System32" /* Github Actions */ + }; - private static String[] DOCKER_COMPOSE_BINARIES = Os.isFamily(Os.FAMILY_WINDOWS) - ? DOCKER_COMPOSE_BINARIES_WINDOWS - : DOCKER_COMPOSE_BINARIES_UNIX; + private static final String[] DEFAULT_PATH = Os.isFamily(Os.FAMILY_WINDOWS) ? DEFAULT_PATH_WINDOWS : DEFAULT_PATH_UNIX; private static final Version MINIMUM_DOCKER_VERSION = Version.fromString("17.05.0"); @@ -177,10 +173,11 @@ void failIfDockerUnavailable(List tasks) { // No Docker binary was located if (availability.path == null) { + final String[] dockerPaths = ExecutableUtils.mergePaths(DEFAULT_PATH, ExecutableUtils.getPathEnv()); final String message = String.format( Locale.ROOT, "Docker (checked [%s]) is required to run the following task%s: \n%s", - String.join(", ", DOCKER_BINARIES), + String.join(", ", dockerPaths), tasks.size() > 1 ? "s" : "", String.join("\n", tasks) ); @@ -290,15 +287,14 @@ static Map parseOsRelease(final List osReleaseLines) { } /** - * Searches the entries in {@link #DOCKER_BINARIES} for the Docker CLI. This method does + * Searches for the Docker CLI in the system PATH and default locations. This method does * not check whether the Docker installation appears usable, see {@link #getDockerAvailability()} * instead. * * @return the path to a CLI, if available. */ private Optional getDockerPath() { - // Check if the Docker binary exists - return Arrays.asList(DOCKER_BINARIES).stream().filter(path -> new File(path).exists()).findFirst(); + return ExecutableUtils.findExecutableInKnownPaths(DOCKER_FILENAME, DEFAULT_PATH); } private void throwDockerRequiredException(final String message) { @@ -403,7 +399,7 @@ public boolean isDockerComposeAvailable() { /** * Marker interface for Docker Compose availability */ - private interface DockerComposeAvailability { + public interface DockerComposeAvailability { /** * Detects Docker Compose V1/V2 availability */ @@ -411,39 +407,62 @@ private static Optional detect(ExecOperations execOpe Optional composePath = getDockerComposePath(); if (composePath.isPresent()) { if (runCommand(execOperations, composePath.get(), "version").isSuccess()) { - return Optional.of(new DockerComposeV1Availability()); + return Optional.of(new DockerComposeV1Availability(composePath.get())); } } if (runCommand(execOperations, dockerPath, "compose", "version").isSuccess()) { - return Optional.of(new DockerComposeV2Availability()); + return Optional.of(new DockerComposeV2Availability(dockerPath)); } return Optional.empty(); } /** - * Searches the entries in {@link #DOCKER_COMPOSE_BINARIES} for the Docker Compose CLI. This method does + * Searches the entries in env variable PATH with fallback to {@link #DEFAULT_PATH} for the Docker Compose CLI. This method does * not check whether the installation appears usable, see {@link #getDockerAvailability()} instead. * * @return the path to a CLI, if available. */ private static Optional getDockerComposePath() { - // Check if the Docker binary exists - return Arrays.asList(DOCKER_COMPOSE_BINARIES).stream().filter(path -> new File(path).exists()).findFirst(); + return ExecutableUtils.findExecutableInKnownPaths(DOCKER_COMPOSE_FILENAME, DEFAULT_PATH); } + /** + * The path to the Docker CLI, or null + */ + public String getPath(); } /** * Docker Compose V1 availability */ - public static class DockerComposeV1Availability implements DockerComposeAvailability {} + public static class DockerComposeV1Availability implements DockerComposeAvailability { + private final String path; + + DockerComposeV1Availability(String path) { + this.path = path; + } + + public String getPath() { + return this.path; + } + } /** * Docker Compose V2 availability */ - public static class DockerComposeV2Availability implements DockerComposeAvailability {} + public static class DockerComposeV2Availability implements DockerComposeAvailability { + private final String path; + + DockerComposeV2Availability(String path) { + this.path = path; + } + + public String getPath() { + return this.path; + } + } /** * This class models the result of running a command. It captures the exit code, standard output and standard error. diff --git a/buildSrc/src/main/java/org/opensearch/gradle/testfixtures/TestFixturesPlugin.java b/buildSrc/src/main/java/org/opensearch/gradle/testfixtures/TestFixturesPlugin.java index c3b870e4ce5ad..6d5f5afc7e2f8 100644 --- a/buildSrc/src/main/java/org/opensearch/gradle/testfixtures/TestFixturesPlugin.java +++ b/buildSrc/src/main/java/org/opensearch/gradle/testfixtures/TestFixturesPlugin.java @@ -39,10 +39,10 @@ import com.avast.gradle.dockercompose.tasks.ComposePull; import com.avast.gradle.dockercompose.tasks.ComposeUp; -import org.apache.tools.ant.taskdefs.condition.Os; import org.opensearch.gradle.SystemPropertyCommandLineArgumentProvider; import org.opensearch.gradle.docker.DockerSupportPlugin; import org.opensearch.gradle.docker.DockerSupportService; +import org.opensearch.gradle.docker.DockerSupportService.DockerAvailability; import org.opensearch.gradle.docker.DockerSupportService.DockerComposeV2Availability; import org.opensearch.gradle.info.BuildParams; import org.opensearch.gradle.precommit.TestingConventionsTasks; @@ -68,9 +68,7 @@ import java.io.IOException; import java.io.UncheckedIOException; import java.nio.file.Files; -import java.util.Arrays; import java.util.Collections; -import java.util.Optional; import java.util.function.BiConsumer; public class TestFixturesPlugin implements Plugin { @@ -79,15 +77,6 @@ public class TestFixturesPlugin implements Plugin { private static final String DOCKER_COMPOSE_THROTTLE = "dockerComposeThrottle"; static final String DOCKER_COMPOSE_YML = "docker-compose.yml"; - private static String[] DOCKER_COMPOSE_BINARIES_UNIX = { "/usr/local/bin/docker-compose", "/usr/bin/docker-compose" }; - - private static String[] DOCKER_COMPOSE_BINARIES_WINDOWS = { - System.getenv("PROGRAMFILES") + "\\Docker\\Docker\\resources\\bin\\docker-compose.exe" }; - - private static String[] DOCKER_COMPOSE_BINARIES = Os.isFamily(Os.FAMILY_WINDOWS) - ? DOCKER_COMPOSE_BINARIES_WINDOWS - : DOCKER_COMPOSE_BINARIES_UNIX; - @Inject protected FileSystemOperations getFileSystemOperations() { throw new UnsupportedOperationException(); @@ -166,12 +155,11 @@ public void execute(Task task) { final Integer timeout = ext.has("dockerComposeHttpTimeout") ? (Integer) ext.get("dockerComposeHttpTimeout") : 120; composeExtension.getEnvironment().put("COMPOSE_HTTP_TIMEOUT", timeout); - Optional dockerCompose = Arrays.asList(DOCKER_COMPOSE_BINARIES) - .stream() - .filter(path -> project.file(path).exists()) - .findFirst(); + final DockerAvailability dockerAvailability = dockerSupport.get().getDockerAvailability(); + if (dockerAvailability.isAvailable && dockerAvailability.isDockerComposeAvailable()) { + composeExtension.getExecutable().set(dockerAvailability.dockerComposeAvailability.getPath()); + } - composeExtension.getExecutable().set(dockerCompose.isPresent() ? dockerCompose.get() : "/usr/bin/docker"); composeExtension.getUseDockerComposeV2() .set(dockerSupport.get().getDockerAvailability().dockerComposeAvailability instanceof DockerComposeV2Availability); diff --git a/buildSrc/src/main/java/org/opensearch/gradle/util/ExecutableUtils.java b/buildSrc/src/main/java/org/opensearch/gradle/util/ExecutableUtils.java new file mode 100644 index 0000000000000..b0f1786d15e46 --- /dev/null +++ b/buildSrc/src/main/java/org/opensearch/gradle/util/ExecutableUtils.java @@ -0,0 +1,125 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gradle.util; + +import org.apache.tools.ant.taskdefs.condition.Os; + +import java.io.File; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.LinkedHashSet; +import java.util.Optional; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * Set of utils for reading and parsing environment PATH variable and for finding executable files. + */ +public class ExecutableUtils { + private static String[] cachedPathEnv; + + /** + * Normalizes the path by trimming and removing redundant "/" or "\" at the end of the path. + * @param path Path string to normalize. + * @return Normalized path. + */ + private static String normalizePath(String path) { + String trimmedPath = path.trim(); + if (trimmedPath.length() > 1 && trimmedPath.endsWith(getFileSeparator())) { + return trimmedPath.substring(0, trimmedPath.length() - 1); + } + + return trimmedPath; + } + + private ExecutableUtils() {} + + private static String getFileSeparator() { + return System.getProperty("file.separator", Os.isFamily(Os.FAMILY_WINDOWS) ? "\\" : "/"); + } + + private static String getPathSeparator() { + return System.getProperty("path.separator", Os.isFamily(Os.FAMILY_WINDOWS) ? ";" : ":"); + } + + /** + * Parses path string into an array of single paths, normalizing them and removing empty paths. + * @param pathString Path string to parse. + * @return An array of single paths. + */ + public static String[] parsePathString(String pathString) { + String[] pathArray = pathString.split(getPathSeparator()); + return Arrays.stream(pathArray).filter(path -> !path.isEmpty()).map(path -> normalizePath(path)).toArray(String[]::new); + + } + + /** + * @return Path environment variable in form of array of normalized paths. + */ + public static String[] getPathEnv() { + if (cachedPathEnv != null) { + return cachedPathEnv; + } + + String pathEnvString = System.getenv("PATH"); + + cachedPathEnv = pathEnvString != null ? parsePathString(pathEnvString) : new String[0]; + return cachedPathEnv; + } + + /** + * Merges two arrays of paths, removing duplicates and keeping the order. It expects the provided paths to be normalized. + * @param path1 First array of normalized paths. + * @param path2 Second array of normalized paths. + * @return An array of merged paths. + */ + public static String[] mergePaths(String[] path1, String[] path2) { + return Stream.concat(Arrays.stream(path1), Arrays.stream(path2)) + // LinkedHashSet removes duplicates and keeps the order + .collect(Collectors.toCollection(LinkedHashSet::new)) + .toArray(String[]::new); + } + + /** + * Finds executable with given filename in paths defined in PATH environment variable. + * @param executableFileName A filename of executable (with extension included if present). + * @return An optional path to found executable. + */ + public static Optional findExecutable(String executableFileName) { + return findExecutable(executableFileName, getPathEnv()); + } + + /** + * Find executable with given name in provided paths. + * @param executableFileName A filename of executable (with extension included if present). + * @param path Array of paths where to look for executable. + * @return An optional path to found executable. + */ + public static Optional findExecutable(String executableFileName, String[] path) { + return Arrays.stream(path) + .map(p -> Path.of(p, executableFileName)) + .map(Path::toFile) + .filter(File::exists) + .filter(File::canExecute) + .findFirst() + .map(File::toString); + } + + /** + * Find executable with given name in known path, with fallback to paths defined in PATH environment. + * @param executableFileName A filename of executable (with extension included if present). + * @param knownPath Array of paths where to look for executable first, before looking into system PATH. + * @return An optional path to found executable. + */ + public static Optional findExecutableInKnownPaths(String executableFileName, String[] knownPath) { + String[] mergedPath = mergePaths(knownPath, getPathEnv()); + + return findExecutable(executableFileName, mergedPath); + } +} diff --git a/buildSrc/src/test/java/org/opensearch/gradle/util/ExecutableUtilsTests.java b/buildSrc/src/test/java/org/opensearch/gradle/util/ExecutableUtilsTests.java new file mode 100644 index 0000000000000..437c229dc8b5f --- /dev/null +++ b/buildSrc/src/test/java/org/opensearch/gradle/util/ExecutableUtilsTests.java @@ -0,0 +1,180 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gradle.util; + +import org.opensearch.gradle.test.GradleUnitTestCase; + +public class ExecutableUtilsTests extends GradleUnitTestCase { + + /** + * Helper method to set system properties for Unix environment and restore them after test execution. + */ + private void withUnixSystemProperties(Runnable test) { + String originalFileSeparator = System.getProperty("file.separator"); + String originalPathSeparator = System.getProperty("path.separator"); + try { + System.setProperty("path.separator", ":"); + System.setProperty("file.separator", "/"); + test.run(); + } finally { + System.setProperty("file.separator", originalFileSeparator); + System.setProperty("path.separator", originalPathSeparator); + } + } + + /** + * Helper method to set system properties for Windows environment and restore them after test execution. + */ + private void withWindowsSystemProperties(Runnable test) { + String originalFileSeparator = System.getProperty("file.separator"); + String originalPathSeparator = System.getProperty("path.separator"); + try { + System.setProperty("path.separator", ";"); + System.setProperty("file.separator", "\\"); + test.run(); + } finally { + System.setProperty("file.separator", originalFileSeparator); + System.setProperty("path.separator", originalPathSeparator); + } + } + + public void testParsePathStringUnix() { + withUnixSystemProperties(() -> { + String pathString = "/usr/bin:/usr/local/bin:/home/user/bin"; + String[] result = ExecutableUtils.parsePathString(pathString); + + assertEquals(3, result.length); + assertEquals("/usr/bin", result[0]); + assertEquals("/usr/local/bin", result[1]); + assertEquals("/home/user/bin", result[2]); + }); + } + + public void testParsePathStringWindows() { + withWindowsSystemProperties(() -> { + String pathString = "C:\\Windows\\System32;C:\\Program Files\\bin;C:\\Users\\user\\bin"; + String[] result = ExecutableUtils.parsePathString(pathString); + + assertEquals(3, result.length); + assertEquals("C:\\Windows\\System32", result[0]); + assertEquals("C:\\Program Files\\bin", result[1]); + assertEquals("C:\\Users\\user\\bin", result[2]); + }); + } + + public void testParsePathSinglePathSeparator() { + withUnixSystemProperties(() -> { + String[] paths = { "/" }; + String pathString = String.join(":", paths); + String[] result = ExecutableUtils.parsePathString(pathString); + + assertEquals(1, result.length); + assertEquals("/", result[0]); + }); + } + + public void testParsePathStringWithEmptyEntries() { + withUnixSystemProperties(() -> { + String pathString = "/usr/bin::/usr/local/bin:::/home/user/bin:"; + String[] result = ExecutableUtils.parsePathString(pathString); + + // Empty entries should be filtered out + assertEquals(3, result.length); + assertEquals("/usr/bin", result[0]); + assertEquals("/usr/local/bin", result[1]); + assertEquals("/home/user/bin", result[2]); + }); + } + + public void testParsePathStringWithTrailingSlashes() { + withUnixSystemProperties(() -> { + String pathString = "/usr/bin/:/usr/local/bin/:/home/user/bin/"; + String[] result = ExecutableUtils.parsePathString(pathString); + + // Trailing slashes should be removed + assertEquals(3, result.length); + assertEquals("/usr/bin", result[0]); + assertEquals("/usr/local/bin", result[1]); + assertEquals("/home/user/bin", result[2]); + }); + } + + public void testParsePathStringWindowsWithTrailingBackslashes() { + withWindowsSystemProperties(() -> { + String pathString = "C:\\Windows\\System32\\;C:\\Program Files\\bin\\"; + String[] result = ExecutableUtils.parsePathString(pathString); + + assertEquals(2, result.length); + assertEquals("C:\\Windows\\System32", result[0]); + assertEquals("C:\\Program Files\\bin", result[1]); + }); + } + + public void testParsePathStringWithWhitespace() { + withUnixSystemProperties(() -> { + String pathString = " /usr/bin : /usr/local/bin : /home/user/bin "; + String[] result = ExecutableUtils.parsePathString(pathString); + + // Whitespace should be trimmed + assertEquals(3, result.length); + assertEquals("/usr/bin", result[0]); + assertEquals("/usr/local/bin", result[1]); + assertEquals("/home/user/bin", result[2]); + }); + } + + public void testParsePathStringEmpty() { + withUnixSystemProperties(() -> { + String pathString = ""; + String[] result = ExecutableUtils.parsePathString(pathString); + + assertEquals(0, result.length); + }); + } + + public void testParsePathStringOnlyDelimiters() { + withUnixSystemProperties(() -> { + String pathString = ":::"; + String[] result = ExecutableUtils.parsePathString(pathString); + + assertEquals(0, result.length); + }); + } + + public void testMergePaths() { + withUnixSystemProperties(() -> { + String[] path1 = { "/usr/bin", "/usr/local/bin" }; + String[] path2 = { "/home/user/bin", "/opt/bin" }; + + String[] result = ExecutableUtils.mergePaths(path1, path2); + + assertEquals(4, result.length); + assertEquals("/usr/bin", result[0]); + assertEquals("/usr/local/bin", result[1]); + assertEquals("/home/user/bin", result[2]); + assertEquals("/opt/bin", result[3]); + }); + } + + public void testMergePathsWithDuplicates() { + withUnixSystemProperties(() -> { + String[] path1 = { "/usr/bin", "/usr/local/bin", "/home/user/bin" }; + String[] path2 = { "/usr/local/bin", "/opt/bin", "/usr/bin" }; + + String[] result = ExecutableUtils.mergePaths(path1, path2); + + // Duplicates should be removed, order preserved from path1 first + assertEquals(4, result.length); + assertEquals("/usr/bin", result[0]); + assertEquals("/usr/local/bin", result[1]); + assertEquals("/home/user/bin", result[2]); + assertEquals("/opt/bin", result[3]); + }); + } +} diff --git a/distribution/build.gradle b/distribution/build.gradle index 8e9f4d4f48fd4..9d2c58207c421 100644 --- a/distribution/build.gradle +++ b/distribution/build.gradle @@ -241,8 +241,8 @@ project.rootProject.subprojects.findAll { it.parent.path == ':modules' }.each { // copy all sandbox modules if the distribution is a snapshot if (VersionProperties.isOpenSearchSnapshot()) { Properties sysProps = System.getProperties(); - // setting this property to false will exclude the sandbox modules from the distribution - final String enableSandbox = sysProps.getProperty("sandbox.enabled", "true"); + // setting this property to true will include the sandbox modules in the distribution + final String enableSandbox = sysProps.getProperty("sandbox.enabled", "false"); if(sysProps != null && enableSandbox == "true") { tasks.withType(NoticeTask).configureEach { project(':sandbox:libs').subprojects.each { Project lib -> diff --git a/gradle/missing-javadoc.gradle b/gradle/missing-javadoc.gradle index b84a7fa030487..1261d7464c103 100644 --- a/gradle/missing-javadoc.gradle +++ b/gradle/missing-javadoc.gradle @@ -166,14 +166,17 @@ configure([ } } -configure([ +// Core projects — always present +def javadocProjects = [ project(":libs:opensearch-common"), project(":libs:opensearch-core"), project(":libs:opensearch-compress"), - project(":server"), - project(":sandbox:libs:native-bridge-spi"), - project(":sandbox:plugins:parquet-data-format") -]) { + project(":server") +] +// Sandbox projects — only present when -Dsandbox.enabled=true +rootProject.allprojects.findAll { it.path.startsWith(':sandbox:') }.each { javadocProjects.add(it) } + +configure(javadocProjects) { project.tasks.withType(MissingJavadocTask) { // TODO: bump to variable missing level after increasing javadoc coverage javadocMissingLevel = "class" diff --git a/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/RankFeaturesFieldMapper.java b/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/RankFeaturesFieldMapper.java index 8c2fc705c4b52..108b1522a9ac4 100644 --- a/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/RankFeaturesFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/RankFeaturesFieldMapper.java @@ -209,6 +209,11 @@ protected void parseCreateField(ParseContext context) { throw new AssertionError("parse is implemented directly"); } + @Override + protected void parseCreateFieldForPluggableFormat(ParseContext context) throws IOException { + throw new AssertionError("parse is implemented directly"); + } + @Override protected String contentType() { return CONTENT_TYPE; diff --git a/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/ScaledFloatFieldMapper.java b/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/ScaledFloatFieldMapper.java index 66bde201805ee..72d53ee7ed82f 100644 --- a/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/ScaledFloatFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/ScaledFloatFieldMapper.java @@ -451,7 +451,36 @@ protected ScaledFloatFieldMapper clone() { @Override protected void parseCreateField(ParseContext context) throws IOException { + Long scaledValue = parseScaledValue(context); + if (scaledValue == null) { + return; + } + + List fields = NumberFieldMapper.NumberType.LONG.createFields( + fieldType().name(), + scaledValue, + indexed, + hasDocValues, + skiplist, + stored + ); + context.doc().addAll(fields); + + if (hasDocValues == false && (indexed || stored)) { + createFieldNamesField(context); + } + } + + @Override + protected void parseCreateFieldForPluggableFormat(ParseContext context) throws IOException { + Long scaledValue = parseScaledValue(context); + if (scaledValue == null) { + return; + } + context.documentInput().addField(fieldType(), scaledValue); + } + private Long parseScaledValue(ParseContext context) throws IOException { XContentParser parser = context.parser(); Object value; Number numericValue = null; @@ -466,7 +495,7 @@ protected void parseCreateField(ParseContext context) throws IOException { numericValue = parse(parser, coerce.value()); } catch (IllegalArgumentException | JsonParseException e) { if (ignoreMalformed.value()) { - return; + return null; } else { throw e; } @@ -479,7 +508,7 @@ protected void parseCreateField(ParseContext context) throws IOException { } if (value == null) { - return; + return null; } if (numericValue == null) { @@ -489,27 +518,13 @@ protected void parseCreateField(ParseContext context) throws IOException { double doubleValue = numericValue.doubleValue(); if (Double.isFinite(doubleValue) == false) { if (ignoreMalformed.value()) { - return; + return null; } else { // since we encode to a long, we have no way to carry NaNs and infinities throw new IllegalArgumentException("[scaled_float] only supports finite values, but got [" + doubleValue + "]"); } } - long scaledValue = Math.round(doubleValue * scalingFactor); - - List fields = NumberFieldMapper.NumberType.LONG.createFields( - fieldType().name(), - scaledValue, - indexed, - hasDocValues, - skiplist, - stored - ); - context.doc().addAll(fields); - - if (hasDocValues == false && (indexed || stored)) { - createFieldNamesField(context); - } + return Math.round(doubleValue * scalingFactor); } static Double parse(Object value) { diff --git a/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/SearchAsYouTypeFieldMapper.java b/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/SearchAsYouTypeFieldMapper.java index f08815ebbbd1e..ead72b615200a 100644 --- a/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/SearchAsYouTypeFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/SearchAsYouTypeFieldMapper.java @@ -489,6 +489,11 @@ protected void parseCreateField(ParseContext context) { throw new UnsupportedOperationException(); } + @Override + protected void parseCreateFieldForPluggableFormat(ParseContext context) { + throw new UnsupportedOperationException(); + } + @Override protected void mergeOptions(FieldMapper other, List conflicts) { @@ -525,6 +530,11 @@ protected void parseCreateField(ParseContext context) { throw new UnsupportedOperationException(); } + @Override + protected void parseCreateFieldForPluggableFormat(ParseContext context) { + throw new UnsupportedOperationException(); + } + @Override protected void mergeOptions(FieldMapper other, List conflicts) { @@ -650,7 +660,7 @@ public SearchAsYouTypeFieldMapper( @Override protected void parseCreateField(ParseContext context) throws IOException { - final String value = context.externalValueSet() ? context.externalValue().toString() : context.parser().textOrNull(); + final String value = extractValue(context); if (value == null) { return; } @@ -665,6 +675,19 @@ protected void parseCreateField(ParseContext context) throws IOException { } } + @Override + protected void parseCreateFieldForPluggableFormat(ParseContext context) throws IOException { + final String value = extractValue(context); + if (value == null) { + return; + } + context.documentInput().addField(fieldType(), value); + } + + private String extractValue(ParseContext context) throws IOException { + return context.externalValueSet() ? context.externalValue().toString() : context.parser().textOrNull(); + } + @Override protected String contentType() { return CONTENT_TYPE; diff --git a/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/TokenCountFieldMapper.java b/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/TokenCountFieldMapper.java index 929a9890a9ec9..9dcbac8edc393 100644 --- a/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/TokenCountFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/TokenCountFieldMapper.java @@ -167,6 +167,27 @@ protected TokenCountFieldMapper( @Override protected void parseCreateField(ParseContext context) throws IOException { + final int tokenCount = parseTokenCount(context); + if (tokenCount == Integer.MIN_VALUE) { + return; + } + + context.doc() + .addAll( + NumberFieldMapper.NumberType.INTEGER.createFields(fieldType().name(), tokenCount, index, hasDocValues, skiplist, store) + ); + } + + @Override + protected void parseCreateFieldForPluggableFormat(ParseContext context) throws IOException { + final int tokenCount = parseTokenCount(context); + if (tokenCount == Integer.MIN_VALUE) { + return; + } + context.documentInput().addField(fieldType(), tokenCount); + } + + private int parseTokenCount(ParseContext context) throws IOException { final String value; if (context.externalValueSet()) { value = context.externalValue().toString(); @@ -175,20 +196,13 @@ protected void parseCreateField(ParseContext context) throws IOException { } if (value == null && nullValue == null) { - return; + return Integer.MIN_VALUE; } - final int tokenCount; if (value == null) { - tokenCount = nullValue; - } else { - tokenCount = countPositions(analyzer, name(), value, enablePositionIncrements); + return nullValue; } - - context.doc() - .addAll( - NumberFieldMapper.NumberType.INTEGER.createFields(fieldType().name(), tokenCount, index, hasDocValues, skiplist, store) - ); + return countPositions(analyzer, name(), value, enablePositionIncrements); } /** diff --git a/modules/mapper-extras/src/test/java/org/opensearch/index/mapper/RankFeatureFieldMapperTests.java b/modules/mapper-extras/src/test/java/org/opensearch/index/mapper/RankFeatureFieldMapperTests.java index fee9471444c19..77d1ad63a7964 100644 --- a/modules/mapper-extras/src/test/java/org/opensearch/index/mapper/RankFeatureFieldMapperTests.java +++ b/modules/mapper-extras/src/test/java/org/opensearch/index/mapper/RankFeatureFieldMapperTests.java @@ -38,6 +38,8 @@ import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.plugins.Plugin; @@ -157,4 +159,23 @@ public void testRejectMultiValuedFields() throws MapperParsingException, IOExcep e.getCause().getMessage() ); } + + public void testParseCreateFieldForPluggableFormat() throws Exception { + DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); + RankFeatureFieldMapper fieldMapper = (RankFeatureFieldMapper) mapper.mappers().getMapper("field"); + assertNotNull(fieldMapper); + assertEquals("rank_feature", fieldMapper.typeName()); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatRankFeatureThrows() throws IOException { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + DocumentMapper mapper = createDocumentMapper(pluggableSettings, fieldMapping(this::minimalMapping)); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + MapperParsingException e = expectThrows( + MapperParsingException.class, + () -> mapper.parse(source(b -> b.field("field", 10)), docInput) + ); + assertThat(e.getCause(), instanceOf(UnsupportedOperationException.class)); + } } diff --git a/modules/mapper-extras/src/test/java/org/opensearch/index/mapper/RankFeaturesFieldMapperTests.java b/modules/mapper-extras/src/test/java/org/opensearch/index/mapper/RankFeaturesFieldMapperTests.java index b95572835e612..54189c0fa696a 100644 --- a/modules/mapper-extras/src/test/java/org/opensearch/index/mapper/RankFeaturesFieldMapperTests.java +++ b/modules/mapper-extras/src/test/java/org/opensearch/index/mapper/RankFeaturesFieldMapperTests.java @@ -155,4 +155,18 @@ public void testRejectMultiValuedFields() throws MapperParsingException, IOExcep e.getCause().getMessage() ); } + + public void testParseCreateFieldForPluggableFormat() throws Exception { + DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); + RankFeaturesFieldMapper fieldMapper = (RankFeaturesFieldMapper) mapper.mappers().getMapper("field"); + assertNotNull(fieldMapper); + assertEquals("rank_features", fieldMapper.typeName()); + } + + public void testPluggableDataFormatRankFeaturesThrows() throws IOException { + DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); + RankFeaturesFieldMapper rfMapper = (RankFeaturesFieldMapper) mapper.mappers().getMapper("field"); + expectThrows(AssertionError.class, () -> rfMapper.parseCreateFieldForPluggableFormat(null)); + } + } diff --git a/modules/mapper-extras/src/test/java/org/opensearch/index/mapper/ScaledFloatFieldMapperTests.java b/modules/mapper-extras/src/test/java/org/opensearch/index/mapper/ScaledFloatFieldMapperTests.java index 08354f786bac8..a1cc0efb6272a 100644 --- a/modules/mapper-extras/src/test/java/org/opensearch/index/mapper/ScaledFloatFieldMapperTests.java +++ b/modules/mapper-extras/src/test/java/org/opensearch/index/mapper/ScaledFloatFieldMapperTests.java @@ -44,7 +44,9 @@ import org.apache.lucene.index.IndexableField; import org.apache.lucene.store.Directory; import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.CheckedConsumer; import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.core.common.bytes.BytesReference; import org.opensearch.core.xcontent.MediaTypeRegistry; @@ -524,4 +526,83 @@ public void testSkiplistParameter() throws IOException { ); assertThat(e.getMessage(), containsString("Failed to parse value [invalid] as only [true] or [false] are allowed")); } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatScaledFloatValue() throws Exception { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + DocumentMapper mapper = createDocumentMapper( + pluggableSettings, + mapping(b -> b.startObject("field").field("type", "scaled_float").field("scaling_factor", 100).endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.field("field", 3.14)), docInput); + + boolean found = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals("field")); + assertTrue("Expected scaled float field to be captured", found); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatScaledFloatNullSkipped() throws Exception { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + DocumentMapper mapper = createDocumentMapper( + pluggableSettings, + mapping(b -> b.startObject("field").field("type", "scaled_float").field("scaling_factor", 100).endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.nullField("field")), docInput); + + boolean found = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals("field")); + assertFalse("Expected no field entry for null value", found); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggablePathEquivalenceWithLucenePath() throws Exception { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + + // Scenario 1: scaled float value + assertScaledFloatLuceneAndPluggablePathsEquivalent( + pluggableSettings, + mapping(b -> b.startObject("field").field("type", "scaled_float").field("scaling_factor", 100).endObject()), + b -> b.field("field", 3.14), + "field", + true + ); + + // Scenario 2: null value — no field produced + assertScaledFloatLuceneAndPluggablePathsEquivalent( + pluggableSettings, + mapping(b -> b.startObject("field").field("type", "scaled_float").field("scaling_factor", 100).endObject()), + b -> b.nullField("field"), + "field", + false + ); + } + + private void assertScaledFloatLuceneAndPluggablePathsEquivalent( + Settings pluggableSettings, + XContentBuilder mappingBuilder, + CheckedConsumer sourceBuilder, + String fieldName, + boolean expectField + ) throws IOException { + // Lucene path + DocumentMapper luceneMapper = createDocumentMapper(mappingBuilder); + ParsedDocument luceneDoc = luceneMapper.parse(source(sourceBuilder)); + IndexableField[] luceneFields = luceneDoc.rootDoc().getFields(fieldName); + + // Pluggable path + DocumentMapper pluggableMapper = createDocumentMapper(pluggableSettings, mappingBuilder); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + pluggableMapper.parse(source(sourceBuilder), docInput); + + boolean pluggableHasField = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals(fieldName)); + + if (!expectField) { + assertEquals("Lucene path should produce no field for '" + fieldName + "'", 0, luceneFields.length); + assertFalse("Pluggable path should produce no field for '" + fieldName + "'", pluggableHasField); + } else { + assertTrue("Lucene path should produce field '" + fieldName + "'", luceneFields.length > 0); + assertTrue("Pluggable path should capture field '" + fieldName + "'", pluggableHasField); + } + } } diff --git a/modules/mapper-extras/src/test/java/org/opensearch/index/mapper/SearchAsYouTypeFieldMapperTests.java b/modules/mapper-extras/src/test/java/org/opensearch/index/mapper/SearchAsYouTypeFieldMapperTests.java index d218f89ff154d..ae7dbb12c8cd8 100644 --- a/modules/mapper-extras/src/test/java/org/opensearch/index/mapper/SearchAsYouTypeFieldMapperTests.java +++ b/modules/mapper-extras/src/test/java/org/opensearch/index/mapper/SearchAsYouTypeFieldMapperTests.java @@ -52,6 +52,8 @@ import org.apache.lucene.search.SynonymQuery; import org.apache.lucene.search.TermQuery; import org.opensearch.common.lucene.search.MultiPhrasePrefixQuery; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.core.common.Strings; import org.opensearch.core.xcontent.MediaTypeRegistry; import org.opensearch.core.xcontent.XContentBuilder; @@ -769,4 +771,116 @@ private static PrefixFieldMapper getPrefixFieldMapper(DocumentMapper defaultMapp assertThat(mapper, instanceOf(PrefixFieldMapper.class)); return (PrefixFieldMapper) mapper; } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatSearchAsYouTypeValue() throws Exception { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + DocumentMapper mapper = createDocumentMapper( + pluggableSettings, + mapping(b -> b.startObject("field").field("type", "search_as_you_type").endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.field("field", "hello world")), docInput); + + boolean found = docInput.getCapturedFields() + .stream() + .anyMatch(e -> e.getKey().name().equals("field") && e.getValue().equals("hello world")); + assertTrue("Expected search_as_you_type value", found); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatSearchAsYouTypeNullSkipped() throws Exception { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + DocumentMapper mapper = createDocumentMapper( + pluggableSettings, + mapping(b -> b.startObject("field").field("type", "search_as_you_type").endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.nullField("field")), docInput); + + boolean found = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals("field")); + assertFalse("Expected no field entry for null value", found); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatSearchAsYouTypeExternalValue() throws Exception { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + DocumentMapper mapper = createDocumentMapper(pluggableSettings, mapping(b -> { + b.startObject("text_field"); + b.field("type", "text"); + b.startObject("fields"); + b.startObject("sayt").field("type", "search_as_you_type").endObject(); + b.endObject(); + b.endObject(); + })); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.field("text_field", "external_sayt")), docInput); + + boolean found = docInput.getCapturedFields() + .stream() + .anyMatch(e -> e.getKey().name().equals("text_field.sayt") && e.getValue().equals("external_sayt")); + assertTrue("Expected search_as_you_type sub-field captured with external value", found); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatPrefixFieldMapperThrows() throws Exception { + DocumentMapper mapper = createDocumentMapper(mapping(b -> b.startObject("field").field("type", "search_as_you_type").endObject())); + PrefixFieldMapper prefixMapper = getPrefixFieldMapper(mapper, "field._index_prefix"); + expectThrows(UnsupportedOperationException.class, () -> prefixMapper.parseCreateFieldForPluggableFormat(null)); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatShingleFieldMapperThrows() throws Exception { + DocumentMapper mapper = createDocumentMapper(mapping(b -> b.startObject("field").field("type", "search_as_you_type").endObject())); + ShingleFieldMapper shingleMapper = getShingleFieldMapper(mapper, "field._2gram"); + expectThrows(UnsupportedOperationException.class, () -> shingleMapper.parseCreateFieldForPluggableFormat(null)); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggablePathEquivalenceWithLucenePath() throws Exception { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + + // Scenario 1: search_as_you_type value + { + DocumentMapper luceneMapper = createDocumentMapper( + mapping(b -> b.startObject("field").field("type", "search_as_you_type").endObject()) + ); + ParsedDocument luceneDoc = luceneMapper.parse(source(b -> b.field("field", "hello world"))); + IndexableField[] luceneFields = luceneDoc.rootDoc().getFields("field"); + + DocumentMapper pluggableMapper = createDocumentMapper( + pluggableSettings, + mapping(b -> b.startObject("field").field("type", "search_as_you_type").endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + pluggableMapper.parse(source(b -> b.field("field", "hello world")), docInput); + + assertTrue("Lucene path should produce field 'field'", luceneFields.length > 0); + assertEquals("hello world", luceneFields[0].stringValue()); + boolean pluggableFound = docInput.getCapturedFields() + .stream() + .anyMatch(e -> e.getKey().name().equals("field") && e.getValue().equals("hello world")); + assertTrue("Pluggable path should capture field 'field' with value 'hello world'", pluggableFound); + } + + // Scenario 2: null value — no field produced + { + DocumentMapper luceneMapper = createDocumentMapper( + mapping(b -> b.startObject("field").field("type", "search_as_you_type").endObject()) + ); + ParsedDocument luceneDoc = luceneMapper.parse(source(b -> b.nullField("field"))); + IndexableField[] luceneFields = luceneDoc.rootDoc().getFields("field"); + + DocumentMapper pluggableMapper = createDocumentMapper( + pluggableSettings, + mapping(b -> b.startObject("field").field("type", "search_as_you_type").endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + pluggableMapper.parse(source(b -> b.nullField("field")), docInput); + + assertEquals("Lucene path should produce no field 'field'", 0, luceneFields.length); + boolean pluggableHasField = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals("field")); + assertFalse("Pluggable path should produce no field 'field'", pluggableHasField); + } + } } diff --git a/modules/mapper-extras/src/test/java/org/opensearch/index/mapper/TokenCountFieldMapperTests.java b/modules/mapper-extras/src/test/java/org/opensearch/index/mapper/TokenCountFieldMapperTests.java index dd0f7485c6e4f..3ad2681aa0695 100644 --- a/modules/mapper-extras/src/test/java/org/opensearch/index/mapper/TokenCountFieldMapperTests.java +++ b/modules/mapper-extras/src/test/java/org/opensearch/index/mapper/TokenCountFieldMapperTests.java @@ -42,6 +42,8 @@ import org.apache.lucene.tests.analysis.CannedTokenStream; import org.apache.lucene.tests.analysis.MockTokenizer; import org.apache.lucene.tests.analysis.Token; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.index.IndexSettings; import org.opensearch.index.analysis.AnalyzerScope; @@ -219,4 +221,81 @@ private SourceToParse createDocument(String fieldValue) throws Exception { private ParseContext.Document parseDocument(DocumentMapper mapper, SourceToParse request) { return mapper.parse(request).docs().stream().findFirst().orElseThrow(() -> new IllegalStateException("Test object not parsed")); } + + private DocumentMapper createIndexWithTokenCountFieldPluggableDataFormat() throws IOException { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + return createDocumentMapper(pluggableSettings, mapping(b -> { + b.startObject("test"); + { + b.field("type", "text"); + b.startObject("fields"); + { + b.startObject("tc"); + { + b.field("type", "token_count"); + b.field("analyzer", "standard"); + } + b.endObject(); + } + b.endObject(); + } + b.endObject(); + })); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatTokenCountValue() throws Exception { + DocumentMapper mapper = createIndexWithTokenCountFieldPluggableDataFormat(); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + mapper.parse(createDocument("three tokens string"), docInput); + + boolean found = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals("test.tc") && e.getValue().equals(3)); + assertTrue("Expected token count of 3 for field test.tc", found); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatTokenCountNullSkipped() throws Exception { + DocumentMapper mapper = createIndexWithTokenCountFieldPluggableDataFormat(); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + mapper.parse(createDocument(null), docInput); + + boolean hasTokenCountField = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals("test.tc")); + assertFalse("Expected no token count field for null value", hasTokenCountField); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggablePathEquivalenceWithLucenePath() throws Exception { + // Scenario 1: token count value + { + DocumentMapper luceneMapper = createIndexWithTokenCountField(false); + ParseContext.Document luceneDoc = parseDocument(luceneMapper, createDocument("three tokens string")); + IndexableField luceneField = luceneDoc.getField("test.tc"); + + DocumentMapper pluggableMapper = createIndexWithTokenCountFieldPluggableDataFormat(); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + pluggableMapper.parse(createDocument("three tokens string"), docInput); + + assertNotNull("Lucene path should produce field 'test.tc'", luceneField); + assertEquals(3, luceneField.numericValue()); + boolean pluggableFound = docInput.getCapturedFields() + .stream() + .anyMatch(e -> e.getKey().name().equals("test.tc") && e.getValue().equals(3)); + assertTrue("Pluggable path should capture field 'test.tc' with value 3", pluggableFound); + } + + // Scenario 2: null value — no field produced + { + DocumentMapper luceneMapper = createIndexWithTokenCountField(false); + ParseContext.Document luceneDoc = parseDocument(luceneMapper, createDocument(null)); + IndexableField luceneField = luceneDoc.getField("test.tc"); + + DocumentMapper pluggableMapper = createIndexWithTokenCountFieldPluggableDataFormat(); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + pluggableMapper.parse(createDocument(null), docInput); + + assertNull("Lucene path should produce no field 'test.tc'", luceneField); + boolean pluggableHasField = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals("test.tc")); + assertFalse("Pluggable path should produce no field 'test.tc'", pluggableHasField); + } + } } diff --git a/modules/parent-join/src/main/java/org/opensearch/join/mapper/MetaJoinFieldMapper.java b/modules/parent-join/src/main/java/org/opensearch/join/mapper/MetaJoinFieldMapper.java index 2b0d3e4764b2a..8232738c5c38f 100644 --- a/modules/parent-join/src/main/java/org/opensearch/join/mapper/MetaJoinFieldMapper.java +++ b/modules/parent-join/src/main/java/org/opensearch/join/mapper/MetaJoinFieldMapper.java @@ -155,6 +155,11 @@ protected void parseCreateField(ParseContext context) throws IOException { throw new IllegalStateException("Should never be called"); } + @Override + protected void parseCreateFieldForPluggableFormat(ParseContext context) throws IOException { + throw new IllegalStateException("Should never be called"); + } + @Override protected String contentType() { return CONTENT_TYPE; diff --git a/modules/parent-join/src/main/java/org/opensearch/join/mapper/ParentIdFieldMapper.java b/modules/parent-join/src/main/java/org/opensearch/join/mapper/ParentIdFieldMapper.java index 29a02a5bc8032..d05c92e60f884 100644 --- a/modules/parent-join/src/main/java/org/opensearch/join/mapper/ParentIdFieldMapper.java +++ b/modules/parent-join/src/main/java/org/opensearch/join/mapper/ParentIdFieldMapper.java @@ -208,6 +208,16 @@ protected void parseCreateField(ParseContext context) throws IOException { context.doc().add(new SortedDocValuesField(fieldType().name(), binaryValue)); } + @Override + protected void parseCreateFieldForPluggableFormat(ParseContext context) throws IOException { + if (context.externalValueSet() == false) { + throw new IllegalStateException("external value not set"); + } + String refId = (String) context.externalValue(); + BytesRef binaryValue = new BytesRef(refId); + context.documentInput().addField(fieldType(), binaryValue); + } + @Override protected void mergeOptions(FieldMapper other, List conflicts) { ParentIdFieldMapper parentMergeWith = (ParentIdFieldMapper) other; diff --git a/modules/parent-join/src/main/java/org/opensearch/join/mapper/ParentJoinFieldMapper.java b/modules/parent-join/src/main/java/org/opensearch/join/mapper/ParentJoinFieldMapper.java index a229d050f3b1d..a1b688e1271e6 100644 --- a/modules/parent-join/src/main/java/org/opensearch/join/mapper/ParentJoinFieldMapper.java +++ b/modules/parent-join/src/main/java/org/opensearch/join/mapper/ParentJoinFieldMapper.java @@ -371,6 +371,11 @@ protected void parseCreateField(ParseContext context) throws IOException { throw new UnsupportedOperationException("parsing is implemented in parse(), this method should NEVER be called"); } + @Override + protected void parseCreateFieldForPluggableFormat(ParseContext context) throws IOException { + throw new UnsupportedOperationException("parsing is implemented in parse(), this method should NEVER be called"); + } + @Override public void parse(ParseContext context) throws IOException { context.path().add(simpleName()); @@ -430,9 +435,13 @@ public void parse(ParseContext context) throws IOException { } BytesRef binaryValue = new BytesRef(name); - Field field = new Field(fieldType().name(), binaryValue, fieldType); - context.doc().add(field); - context.doc().add(new SortedDocValuesField(fieldType().name(), binaryValue)); + if (isPluggableDataFormatFeatureEnabled(context)) { + context.documentInput().addField(fieldType(), binaryValue); + } else { + context.doc().add(new Field(fieldType().name(), binaryValue, fieldType)); + context.doc().add(new SortedDocValuesField(fieldType().name(), binaryValue)); + } + context.path().remove(); } diff --git a/modules/parent-join/src/test/java/org/opensearch/join/mapper/ParentJoinFieldMapperTests.java b/modules/parent-join/src/test/java/org/opensearch/join/mapper/ParentJoinFieldMapperTests.java index ed6a8259d6e90..765236ef5a2d4 100644 --- a/modules/parent-join/src/test/java/org/opensearch/join/mapper/ParentJoinFieldMapperTests.java +++ b/modules/parent-join/src/test/java/org/opensearch/join/mapper/ParentJoinFieldMapperTests.java @@ -33,11 +33,15 @@ package org.opensearch.join.mapper; import org.opensearch.common.compress.CompressedXContent; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.core.common.bytes.BytesReference; import org.opensearch.core.xcontent.MediaTypeRegistry; import org.opensearch.index.IndexService; +import org.opensearch.index.engine.dataformat.DocumentInput; import org.opensearch.index.mapper.DocumentMapper; +import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.MapperException; import org.opensearch.index.mapper.MapperParsingException; import org.opensearch.index.mapper.MapperService; @@ -47,8 +51,11 @@ import org.opensearch.plugins.Plugin; import org.opensearch.test.OpenSearchSingleNodeTestCase; +import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.List; +import java.util.Map; import static org.hamcrest.Matchers.containsString; @@ -654,4 +661,167 @@ public void testEagerGlobalOrdinals() throws Exception { assertNotNull(service.mapperService().fieldType("join_field#child")); assertFalse(service.mapperService().fieldType("join_field#child").eagerGlobalOrdinals()); } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableFormatParentDoc() throws Exception { + String mapping = XContentFactory.jsonBuilder() + .startObject() + .startObject("properties") + .startObject("join_field") + .field("type", "join") + .startObject("relations") + .field("parent", "child") + .endObject() + .endObject() + .endObject() + .endObject() + .toString(); + Settings settings = Settings.builder().put("index.pluggable.dataformat.enabled", true).build(); + IndexService service = createIndex("test", settings); + DocumentMapper docMapper = service.mapperService() + .merge("type", new CompressedXContent(mapping), MapperService.MergeReason.MAPPING_UPDATE); + + TestDocumentInput docInput = new TestDocumentInput(); + docMapper.parse( + new SourceToParse( + "test", + "1", + BytesReference.bytes(XContentFactory.jsonBuilder().startObject().field("join_field", "parent").endObject()), + MediaTypeRegistry.JSON + ), + docInput + ); + + // ParentJoinFieldMapper writes the join name ("parent") via documentInput + assertTrue( + "Expected join_field captured with value containing 'parent'", + docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals("join_field")) + ); + // ParentIdFieldMapper writes the doc id via documentInput (parseCreateFieldForPluggableFormat) + assertTrue( + "Expected join_field#parent captured via ParentIdFieldMapper", + docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals("join_field#parent")) + ); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableFormatChildDoc() throws Exception { + String mapping = XContentFactory.jsonBuilder() + .startObject() + .startObject("properties") + .startObject("join_field") + .field("type", "join") + .startObject("relations") + .field("parent", "child") + .endObject() + .endObject() + .endObject() + .endObject() + .toString(); + Settings settings = Settings.builder().put("index.pluggable.dataformat.enabled", true).build(); + IndexService service = createIndex("test", settings); + DocumentMapper docMapper = service.mapperService() + .merge("type", new CompressedXContent(mapping), MapperService.MergeReason.MAPPING_UPDATE); + + TestDocumentInput docInput = new TestDocumentInput(); + docMapper.parse( + new SourceToParse( + "test", + "2", + BytesReference.bytes( + XContentFactory.jsonBuilder() + .startObject() + .startObject("join_field") + .field("name", "child") + .field("parent", "1") + .endObject() + .endObject() + ), + MediaTypeRegistry.JSON, + "1" + ), + docInput + ); + + // ParentJoinFieldMapper writes the join name ("child") via documentInput + assertTrue( + "Expected join_field captured with value containing 'child'", + docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals("join_field")) + ); + // ParentIdFieldMapper writes the parent ref via documentInput (parseCreateFieldForPluggableFormat) + assertTrue( + "Expected join_field#parent captured via ParentIdFieldMapper", + docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals("join_field#parent")) + ); + } + + /** + * Simple DocumentInput that captures addField calls for assertion. + */ + private static class TestDocumentInput implements DocumentInput { + private final List> capturedFields = new ArrayList<>(); + + @Override + public Object getFinalInput() { + return null; + } + + @Override + public void addField(MappedFieldType fieldType, Object value) { + capturedFields.add(Map.entry(fieldType, value)); + } + + @Override + public void setRowId(String rowIdFieldName, long rowId) {} + + @Override + public void close() {} + + public List> getCapturedFields() { + return capturedFields; + } + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableFormatParentJoinFieldMapperDirectThrows() throws Exception { + String mapping = XContentFactory.jsonBuilder() + .startObject() + .startObject("properties") + .startObject("join_field") + .field("type", "join") + .startObject("relations") + .field("parent", "child") + .endObject() + .endObject() + .endObject() + .endObject() + .toString(); + IndexService service = createIndex("test_direct"); + DocumentMapper docMapper = service.mapperService() + .merge("type", new CompressedXContent(mapping), MapperService.MergeReason.MAPPING_UPDATE); + ParentJoinFieldMapper joinMapper = (ParentJoinFieldMapper) docMapper.mappers().getMapper("join_field"); + expectThrows(UnsupportedOperationException.class, () -> joinMapper.parseCreateFieldForPluggableFormat(null)); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableFormatMetaJoinFieldMapperThrows() throws Exception { + String mapping = XContentFactory.jsonBuilder() + .startObject() + .startObject("properties") + .startObject("join_field") + .field("type", "join") + .startObject("relations") + .field("parent", "child") + .endObject() + .endObject() + .endObject() + .endObject() + .toString(); + IndexService service = createIndex("test_meta"); + DocumentMapper docMapper = service.mapperService() + .merge("type", new CompressedXContent(mapping), MapperService.MergeReason.MAPPING_UPDATE); + MetaJoinFieldMapper metaMapper = (MetaJoinFieldMapper) docMapper.mappers().getMapper("_parent_join"); + assertNotNull(metaMapper); + expectThrows(IllegalStateException.class, () -> metaMapper.parseCreateFieldForPluggableFormat(null)); + } } diff --git a/modules/percolator/src/main/java/org/opensearch/percolator/PercolatorFieldMapper.java b/modules/percolator/src/main/java/org/opensearch/percolator/PercolatorFieldMapper.java index 05ea90eb6ce6b..be094de265701 100644 --- a/modules/percolator/src/main/java/org/opensearch/percolator/PercolatorFieldMapper.java +++ b/modules/percolator/src/main/java/org/opensearch/percolator/PercolatorFieldMapper.java @@ -513,6 +513,11 @@ protected void parseCreateField(ParseContext context) { throw new UnsupportedOperationException("should not be invoked"); } + @Override + protected void parseCreateFieldForPluggableFormat(ParseContext context) { + throw new UnsupportedOperationException("should not be invoked"); + } + @Override protected String contentType() { return CONTENT_TYPE; diff --git a/modules/percolator/src/test/java/org/opensearch/percolator/PercolatorFieldMapperTests.java b/modules/percolator/src/test/java/org/opensearch/percolator/PercolatorFieldMapperTests.java index 3a5add974c439..31107d432b67f 100644 --- a/modules/percolator/src/test/java/org/opensearch/percolator/PercolatorFieldMapperTests.java +++ b/modules/percolator/src/test/java/org/opensearch/percolator/PercolatorFieldMapperTests.java @@ -1172,4 +1172,17 @@ public String pluginScriptLang() { } } + public void testParseCreateFieldForPluggableFormat() throws Exception { + addQueryFieldMappings(); + DocumentMapper documentMapper = mapperService.documentMapper(); + PercolatorFieldMapper fieldMapper = (PercolatorFieldMapper) documentMapper.mappers().getMapper(fieldName); + + // parseCreateFieldForPluggableFormat should throw since PercolatorFieldMapper overrides parse() directly + UnsupportedOperationException exception = expectThrows( + UnsupportedOperationException.class, + () -> fieldMapper.parseCreateFieldForPluggableFormat(null) + ); + assertThat(exception.getMessage(), containsString("should not be invoked")); + } + } diff --git a/modules/percolator/src/test/java/org/opensearch/percolator/QueryBuilderStoreTests.java b/modules/percolator/src/test/java/org/opensearch/percolator/QueryBuilderStoreTests.java index 9a708a823507f..28e38ae5e3e6e 100644 --- a/modules/percolator/src/test/java/org/opensearch/percolator/QueryBuilderStoreTests.java +++ b/modules/percolator/src/test/java/org/opensearch/percolator/QueryBuilderStoreTests.java @@ -100,6 +100,12 @@ public void testStoringQueryBuilders() throws IOException { ParseContext parseContext = mock(ParseContext.class); ParseContext.Document document = new ParseContext.Document(); when(parseContext.doc()).thenReturn(document); + when(parseContext.indexSettings()).thenReturn( + new org.opensearch.index.IndexSettings( + IndexMetadata.builder("test").settings(settings).numberOfShards(1).numberOfReplicas(0).build(), + settings + ) + ); PercolatorFieldMapper.createQueryBuilderField(version, fieldMapper, queryBuilders[i], parseContext); indexWriter.addDocument(document); } diff --git a/plugins/analysis-icu/src/main/java/org/opensearch/index/mapper/ICUCollationKeywordFieldMapper.java b/plugins/analysis-icu/src/main/java/org/opensearch/index/mapper/ICUCollationKeywordFieldMapper.java index 4408ef51c3a19..82a97099a8034 100644 --- a/plugins/analysis-icu/src/main/java/org/opensearch/index/mapper/ICUCollationKeywordFieldMapper.java +++ b/plugins/analysis-icu/src/main/java/org/opensearch/index/mapper/ICUCollationKeywordFieldMapper.java @@ -793,6 +793,33 @@ protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, @Override protected void parseCreateField(ParseContext context) throws IOException { + final BytesRef binaryValue = parseCollationKey(context); + if (binaryValue == null) { + return; + } + + if (fieldType.indexOptions() != IndexOptions.NONE || fieldType.stored()) { + Field field = new Field(mappedFieldType.name(), binaryValue, fieldType); + context.doc().add(field); + } + + if (fieldType().hasDocValues()) { + context.doc().add(new SortedSetDocValuesField(fieldType().name(), binaryValue)); + } else if (fieldType.indexOptions() != IndexOptions.NONE || fieldType.stored()) { + createFieldNamesField(context); + } + } + + @Override + protected void parseCreateFieldForPluggableFormat(ParseContext context) throws IOException { + final BytesRef binaryValue = parseCollationKey(context); + if (binaryValue == null) { + return; + } + context.documentInput().addField(fieldType(), binaryValue); + } + + private BytesRef parseCollationKey(ParseContext context) throws IOException { final String value; if (context.externalValueSet()) { value = context.externalValue().toString(); @@ -806,22 +833,11 @@ protected void parseCreateField(ParseContext context) throws IOException { } if (value == null || value.length() > ignoreAbove) { - return; + return null; } RawCollationKey key = collator.getRawCollationKey(value, null); - final BytesRef binaryValue = new BytesRef(key.bytes, 0, key.size); - - if (fieldType.indexOptions() != IndexOptions.NONE || fieldType.stored()) { - Field field = new Field(mappedFieldType.name(), binaryValue, fieldType); - context.doc().add(field); - } - - if (fieldType().hasDocValues()) { - context.doc().add(new SortedSetDocValuesField(fieldType().name(), binaryValue)); - } else if (fieldType.indexOptions() != IndexOptions.NONE || fieldType.stored()) { - createFieldNamesField(context); - } + return new BytesRef(key.bytes, 0, key.size); } } diff --git a/plugins/analysis-icu/src/test/java/org/opensearch/index/mapper/ICUCollationKeywordFieldMapperTests.java b/plugins/analysis-icu/src/test/java/org/opensearch/index/mapper/ICUCollationKeywordFieldMapperTests.java index 0a2f48f4215cb..0e577d6181c6e 100644 --- a/plugins/analysis-icu/src/test/java/org/opensearch/index/mapper/ICUCollationKeywordFieldMapperTests.java +++ b/plugins/analysis-icu/src/test/java/org/opensearch/index/mapper/ICUCollationKeywordFieldMapperTests.java @@ -39,14 +39,19 @@ import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableFieldType; import org.apache.lucene.util.BytesRef; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.engine.dataformat.DocumentInput; import org.opensearch.plugin.analysis.icu.AnalysisICUPlugin; import org.opensearch.plugins.Plugin; import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.List; +import java.util.Map; import java.util.Set; import static org.hamcrest.Matchers.containsString; @@ -308,4 +313,84 @@ public void testUpdateIgnoreAbove() throws IOException { assertEquals(0, fields.length); } + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatCollationKeyword() throws IOException { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + DocumentMapper mapper = createDocumentMapper(pluggableSettings, fieldMapping(this::minimalMapping)); + TestDocumentInput docInput = new TestDocumentInput(); + mapper.parse(source(b -> b.field("field", "test_value")), docInput); + + boolean found = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals("field")); + assertTrue("Expected ICU collation keyword field captured", found); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatCollationKeywordNullSkipped() throws IOException { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + DocumentMapper mapper = createDocumentMapper(pluggableSettings, fieldMapping(this::minimalMapping)); + TestDocumentInput docInput = new TestDocumentInput(); + mapper.parse(source(b -> b.nullField("field")), docInput); + + boolean hasField = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals("field")); + assertFalse("Expected no captured field for null value", hasField); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggablePathEquivalenceWithLucenePath() throws IOException { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + + // Scenario 1: collation keyword value + { + DocumentMapper luceneMapper = createDocumentMapper(fieldMapping(this::minimalMapping)); + ParsedDocument luceneDoc = luceneMapper.parse(source(b -> b.field("field", "1234"))); + IndexableField[] luceneFields = luceneDoc.rootDoc().getFields("field"); + + DocumentMapper pluggableMapper = createDocumentMapper(pluggableSettings, fieldMapping(this::minimalMapping)); + TestDocumentInput docInput = new TestDocumentInput(); + pluggableMapper.parse(source(b -> b.field("field", "1234")), docInput); + + assertTrue("Lucene path should produce field 'field'", luceneFields.length > 0); + boolean pluggableFound = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals("field")); + assertTrue("Pluggable path should capture field 'field'", pluggableFound); + } + + // Scenario 2: null value — no field produced + { + DocumentMapper luceneMapper = createDocumentMapper(fieldMapping(this::minimalMapping)); + ParsedDocument luceneDoc = luceneMapper.parse(source(b -> b.nullField("field"))); + IndexableField[] luceneFields = luceneDoc.rootDoc().getFields("field"); + + DocumentMapper pluggableMapper = createDocumentMapper(pluggableSettings, fieldMapping(this::minimalMapping)); + TestDocumentInput docInput = new TestDocumentInput(); + pluggableMapper.parse(source(b -> b.nullField("field")), docInput); + + assertEquals("Lucene path should produce no field 'field'", 0, luceneFields.length); + boolean pluggableHasField = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals("field")); + assertFalse("Pluggable path should produce no field 'field'", pluggableHasField); + } + } + + private static class TestDocumentInput implements DocumentInput { + private final List> capturedFields = new ArrayList<>(); + + @Override + public Object getFinalInput() { + return null; + } + + @Override + public void addField(MappedFieldType fieldType, Object value) { + capturedFields.add(Map.entry(fieldType, value)); + } + + @Override + public void setRowId(String rowIdFieldName, long rowId) {} + + @Override + public void close() {} + + public List> getCapturedFields() { + return capturedFields; + } + } } diff --git a/plugins/arrow-flight-rpc/src/main/java/org/opensearch/arrow/flight/transport/FlightOutboundHandler.java b/plugins/arrow-flight-rpc/src/main/java/org/opensearch/arrow/flight/transport/FlightOutboundHandler.java index a4a76d518bad2..03da4ff3a14b5 100644 --- a/plugins/arrow-flight-rpc/src/main/java/org/opensearch/arrow/flight/transport/FlightOutboundHandler.java +++ b/plugins/arrow-flight-rpc/src/main/java/org/opensearch/arrow/flight/transport/FlightOutboundHandler.java @@ -20,7 +20,6 @@ import org.opensearch.Version; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.common.io.stream.BytesStreamOutput; -import org.opensearch.common.util.concurrent.ThreadContext; import org.opensearch.core.common.bytes.BytesReference; import org.opensearch.core.transport.TransportResponse; import org.opensearch.threadpool.ThreadPool; @@ -115,7 +114,6 @@ public void sendResponseBatch( final boolean compress, final boolean isHandshake ) throws IOException { - ThreadContext.StoredContext storedContext = threadPool.getThreadContext().stashContext(); BatchTask task = new BatchTask( nodeVersion, features, @@ -128,8 +126,7 @@ public void sendResponseBatch( isHandshake, false, false, - null, - storedContext + null ); if (!(channel instanceof FlightServerChannel flightChannel)) { @@ -137,17 +134,16 @@ public void sendResponseBatch( return; } - flightChannel.getExecutor().execute(() -> { + flightChannel.getExecutor().execute(threadPool.getThreadContext().preserveContext(() -> { try (BatchTask ignored = task) { processBatchTask(task); } catch (Exception e) { messageListener.onResponseSent(requestId, action, e); } - }); + })); } private void processBatchTask(BatchTask task) { - task.storedContext().restore(); if (!(task.channel() instanceof FlightServerChannel flightChannel)) { Exception error = new IllegalStateException("Expected FlightServerChannel, got " + task.channel().getClass().getName()); messageListener.onResponseSent(task.requestId(), task.action(), error); @@ -175,7 +171,6 @@ public void completeStream( final long requestId, final String action ) { - ThreadContext.StoredContext storedContext = threadPool.getThreadContext().stashContext(); BatchTask completeTask = new BatchTask( nodeVersion, features, @@ -188,8 +183,7 @@ public void completeStream( false, true, false, - null, - storedContext + null ); if (!(channel instanceof FlightServerChannel flightChannel)) { @@ -197,17 +191,16 @@ public void completeStream( return; } - flightChannel.getExecutor().execute(() -> { + flightChannel.getExecutor().execute(threadPool.getThreadContext().preserveContext(() -> { try (BatchTask ignored = completeTask) { processCompleteTask(completeTask); } catch (Exception e) { messageListener.onResponseSent(requestId, action, e); } - }); + })); } private void processCompleteTask(BatchTask task) { - task.storedContext().restore(); if (!(task.channel() instanceof FlightServerChannel flightChannel)) { Exception error = new IllegalStateException("Expected FlightServerChannel, got " + task.channel().getClass().getName()); messageListener.onResponseSent(task.requestId(), task.action(), error); @@ -231,7 +224,6 @@ public void sendErrorResponse( final String action, final Exception error ) { - ThreadContext.StoredContext storedContext = threadPool.getThreadContext().stashContext(); BatchTask errorTask = new BatchTask( nodeVersion, features, @@ -244,8 +236,7 @@ public void sendErrorResponse( false, false, true, - error, - storedContext + error ); if (!(channel instanceof FlightServerChannel flightChannel)) { @@ -253,17 +244,16 @@ public void sendErrorResponse( return; } - flightChannel.getExecutor().execute(() -> { + flightChannel.getExecutor().execute(threadPool.getThreadContext().preserveContext(() -> { try (BatchTask ignored = errorTask) { processErrorTask(errorTask); } catch (Exception e) { messageListener.onResponseSent(requestId, action, e); } - }); + })); } private void processErrorTask(BatchTask task) { - task.storedContext().restore(); if (!(task.channel() instanceof FlightServerChannel flightServerChannel)) { Exception error = new IllegalStateException("Expected FlightServerChannel, got " + task.channel().getClass().getName()); messageListener.onResponseSent(task.requestId(), task.action(), error); @@ -311,13 +301,10 @@ private ByteBuffer getHeaderBuffer(long requestId, Version nodeVersion, Set features, TcpChannel channel, FlightTransportChannel transportChannel, long requestId, String action, TransportResponse response, boolean compress, boolean isHandshake, boolean isComplete, boolean isError, - Exception error, ThreadContext.StoredContext storedContext) implements AutoCloseable { + Exception error) implements AutoCloseable { @Override public void close() { - if (storedContext != null) { - storedContext.close(); - } if ((isComplete || isError) && transportChannel != null) { transportChannel.releaseChannel(isError); } diff --git a/plugins/arrow-flight-rpc/src/test/java/org/opensearch/arrow/flight/transport/FlightOutboundHandlerContextPropagationTests.java b/plugins/arrow-flight-rpc/src/test/java/org/opensearch/arrow/flight/transport/FlightOutboundHandlerContextPropagationTests.java new file mode 100644 index 0000000000000..676add5196cd8 --- /dev/null +++ b/plugins/arrow-flight-rpc/src/test/java/org/opensearch/arrow/flight/transport/FlightOutboundHandlerContextPropagationTests.java @@ -0,0 +1,258 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.arrow.flight.transport; + +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.transport.TransportResponse; +import org.opensearch.threadpool.ThreadPool; +import org.opensearch.transport.StreamTransportResponseHandler; +import org.opensearch.transport.TransportException; +import org.opensearch.transport.TransportMessageListener; +import org.opensearch.transport.TransportRequestOptions; +import org.opensearch.transport.stream.StreamTransportResponse; + +import java.io.IOException; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; + +/** + * Tests that thread context headers are properly propagated through the Flight transport layer + * when sending response batches, completing streams, and sending errors. + */ +public class FlightOutboundHandlerContextPropagationTests extends FlightTransportTestBase { + + private static final int TIMEOUT_SEC = 10; + private static final String CONTEXT_HEADER = "test-context-header"; + private static final String CONTEXT_VALUE = "propagated-value"; + + public void testThreadContextPropagatedThroughStreamResponseBatch() throws InterruptedException { + String action = "internal:test/context-propagation"; + CountDownLatch handlerLatch = new CountDownLatch(1); + AtomicInteger responseCount = new AtomicInteger(0); + AtomicReference handlerException = new AtomicReference<>(); + AtomicReference capturedHeaderOnServer = new AtomicReference<>(); + + streamTransportService.registerRequestHandler(action, ThreadPool.Names.SAME, TestRequest::new, (request, channel, task) -> { + try { + // Set a header in the request handler's thread context + threadPool.getThreadContext().putHeader(CONTEXT_HEADER, CONTEXT_VALUE); + + // Verify context is set before sending batch + assertEquals(CONTEXT_VALUE, threadPool.getThreadContext().getHeader(CONTEXT_HEADER)); + + channel.sendResponseBatch(new TestResponse("Response 1")); + + // Verify the caller's context is preserved after sendResponseBatch + capturedHeaderOnServer.set(threadPool.getThreadContext().getHeader(CONTEXT_HEADER)); + + channel.sendResponseBatch(new TestResponse("Response 2")); + + // Verify context is still preserved after second batch + assertEquals(CONTEXT_VALUE, threadPool.getThreadContext().getHeader(CONTEXT_HEADER)); + + channel.completeStream(); + + // Verify context is still preserved after completeStream + assertEquals(CONTEXT_VALUE, threadPool.getThreadContext().getHeader(CONTEXT_HEADER)); + } catch (Exception e) { + try { + channel.sendResponse(e); + } catch (IOException ignored) {} + } + }); + + TestRequest testRequest = new TestRequest(); + TransportRequestOptions options = TransportRequestOptions.builder().withType(TransportRequestOptions.Type.STREAM).build(); + + StreamTransportResponseHandler responseHandler = new StreamTransportResponseHandler() { + @Override + public void handleStreamResponse(StreamTransportResponse streamResponse) { + try (streamResponse) { + try { + while (streamResponse.nextResponse() != null) { + responseCount.incrementAndGet(); + } + } catch (Exception e) { + handlerException.set(e); + } + } catch (Exception ignored) {} finally { + handlerLatch.countDown(); + } + } + + @Override + public void handleException(TransportException exp) { + handlerException.set(exp); + handlerLatch.countDown(); + } + + @Override + public String executor() { + return ThreadPool.Names.SAME; + } + + @Override + public TestResponse read(StreamInput in) throws IOException { + return new TestResponse(in); + } + }; + + streamTransportService.sendRequest(remoteNode, action, testRequest, options, responseHandler); + + assertTrue(handlerLatch.await(TIMEOUT_SEC, TimeUnit.SECONDS)); + assertEquals(2, responseCount.get()); + assertNull("No exception expected but got: " + handlerException.get(), handlerException.get()); + assertEquals( + "Thread context header should be preserved on the server handler thread after sendResponseBatch", + CONTEXT_VALUE, + capturedHeaderOnServer.get() + ); + } + + public void testThreadContextPropagatedThroughErrorResponse() throws InterruptedException { + String action = "internal:test/context-error-propagation"; + CountDownLatch handlerLatch = new CountDownLatch(1); + AtomicReference handlerException = new AtomicReference<>(); + AtomicReference capturedHeaderOnServer = new AtomicReference<>(); + + streamTransportService.registerRequestHandler(action, ThreadPool.Names.SAME, TestRequest::new, (request, channel, task) -> { + try { + // Set a header in the request handler's thread context + threadPool.getThreadContext().putHeader(CONTEXT_HEADER, CONTEXT_VALUE); + + // Send an error + channel.sendResponse(new RuntimeException("Intentional test error")); + + // Verify the caller's context is preserved after sendErrorResponse + capturedHeaderOnServer.set(threadPool.getThreadContext().getHeader(CONTEXT_HEADER)); + } catch (IOException ignored) {} + }); + + TestRequest testRequest = new TestRequest(); + TransportRequestOptions options = TransportRequestOptions.builder().withType(TransportRequestOptions.Type.STREAM).build(); + + StreamTransportResponseHandler responseHandler = new StreamTransportResponseHandler() { + @Override + public void handleStreamResponse(StreamTransportResponse streamResponse) { + try (streamResponse) { + try { + while (streamResponse.nextResponse() != null) { + } + } catch (Exception e) { + handlerException.set(e); + } + } catch (Exception ignored) {} finally { + handlerLatch.countDown(); + } + } + + @Override + public void handleException(TransportException exp) { + handlerException.set(exp); + handlerLatch.countDown(); + } + + @Override + public String executor() { + return ThreadPool.Names.SAME; + } + + @Override + public TestResponse read(StreamInput in) throws IOException { + return new TestResponse(in); + } + }; + + streamTransportService.sendRequest(remoteNode, action, testRequest, options, responseHandler); + + assertTrue(handlerLatch.await(TIMEOUT_SEC, TimeUnit.SECONDS)); + assertNotNull(handlerException.get()); + assertEquals( + "Thread context header should be preserved on the server handler thread after sendErrorResponse", + CONTEXT_VALUE, + capturedHeaderOnServer.get() + ); + } + + public void testContextHeaderPropagatedToResponseHeaders() throws InterruptedException { + String action = "internal:test/context-header-in-response"; + CountDownLatch handlerLatch = new CountDownLatch(1); + AtomicInteger responseCount = new AtomicInteger(0); + AtomicReference handlerException = new AtomicReference<>(); + AtomicInteger messageSentCount = new AtomicInteger(0); + + TransportMessageListener testListener = new TransportMessageListener() { + @Override + public void onResponseSent(long requestId, String action, TransportResponse response) { + messageSentCount.incrementAndGet(); + } + + }; + + flightTransport.setMessageListener(testListener); + + streamTransportService.registerRequestHandler(action, ThreadPool.Names.SAME, TestRequest::new, (request, channel, task) -> { + try { + channel.sendResponseBatch(new TestResponse("batch-1")); + channel.sendResponseBatch(new TestResponse("batch-2")); + channel.completeStream(); + } catch (Exception e) { + try { + channel.sendResponse(e); + } catch (IOException ioException) {} + } + }); + + TestRequest testRequest = new TestRequest(); + TransportRequestOptions options = TransportRequestOptions.builder().withType(TransportRequestOptions.Type.STREAM).build(); + + StreamTransportResponseHandler responseHandler = new StreamTransportResponseHandler() { + @Override + public void handleStreamResponse(StreamTransportResponse streamResponse) { + try (streamResponse) { + try { + while (streamResponse.nextResponse() != null) { + responseCount.incrementAndGet(); + } + } catch (Exception e) { + handlerException.set(e); + } + } catch (Exception ignored) {} finally { + handlerLatch.countDown(); + } + } + + @Override + public void handleException(TransportException exp) { + handlerException.set(exp); + handlerLatch.countDown(); + } + + @Override + public String executor() { + return ThreadPool.Names.SAME; + } + + @Override + public TestResponse read(StreamInput in) throws IOException { + return new TestResponse(in); + } + }; + + streamTransportService.sendRequest(remoteNode, action, testRequest, options, responseHandler); + + assertTrue(handlerLatch.await(TIMEOUT_SEC, TimeUnit.SECONDS)); + assertEquals(2, responseCount.get()); + assertNull(handlerException.get()); + // 2 batches + 1 completeStream = 3 message sent events + assertEquals(3, messageSentCount.get()); + } +} diff --git a/plugins/arrow-flight-rpc/src/test/java/org/opensearch/arrow/flight/transport/FlightOutboundHandlerTests.java b/plugins/arrow-flight-rpc/src/test/java/org/opensearch/arrow/flight/transport/FlightOutboundHandlerTests.java new file mode 100644 index 0000000000000..1c1aeb3fda385 --- /dev/null +++ b/plugins/arrow-flight-rpc/src/test/java/org/opensearch/arrow/flight/transport/FlightOutboundHandlerTests.java @@ -0,0 +1,206 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.arrow.flight.transport; + +import org.opensearch.Version; +import org.opensearch.common.util.concurrent.ThreadContext; +import org.opensearch.core.transport.TransportResponse; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.threadpool.TestThreadPool; +import org.opensearch.threadpool.ThreadPool; +import org.opensearch.transport.StatsTracker; +import org.opensearch.transport.TransportMessageListener; +import org.junit.After; +import org.junit.Before; + +import java.util.Collections; +import java.util.Set; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class FlightOutboundHandlerTests extends OpenSearchTestCase { + + private ThreadPool threadPool; + private FlightOutboundHandler handler; + private ExecutorService executor; + private FlightServerChannel mockFlightChannel; + private TransportMessageListener mockListener; + + private static final String HEADER_KEY = "test-header"; + private static final String HEADER_VALUE = "test-value"; + + @Before + @Override + public void setUp() throws Exception { + super.setUp(); + threadPool = new TestThreadPool(getTestName()); + executor = Executors.newSingleThreadExecutor(); + handler = new FlightOutboundHandler("test-node", Version.CURRENT, new String[0], new StatsTracker(), threadPool); + + mockFlightChannel = mock(FlightServerChannel.class); + when(mockFlightChannel.getExecutor()).thenReturn(executor); + + mockListener = mock(TransportMessageListener.class); + handler.setMessageListener(mockListener); + } + + @After + @Override + public void tearDown() throws Exception { + executor.shutdown(); + assertTrue(executor.awaitTermination(5, TimeUnit.SECONDS)); + threadPool.shutdown(); + super.tearDown(); + } + + public void testSendResponseBatchPreservesCallerThreadContext() throws Exception { + ThreadContext threadContext = threadPool.getThreadContext(); + threadContext.putHeader(HEADER_KEY, HEADER_VALUE); + + CountDownLatch latch = new CountDownLatch(1); + doAnswer(invocation -> { + latch.countDown(); + return null; + }).when(mockListener).onResponseSent(anyLong(), anyString(), any(TransportResponse.class)); + + handler.sendResponseBatch( + Version.CURRENT, + Collections.emptySet(), + mockFlightChannel, + mock(FlightTransportChannel.class), + 1L, + "test-action", + mock(TransportResponse.class), + false, + false + ); + + // Verify the caller's thread context is NOT cleared + assertEquals( + "Caller's thread context should be preserved after sendResponseBatch", + HEADER_VALUE, + threadContext.getHeader(HEADER_KEY) + ); + } + + public void testCompleteStreamPreservesCallerThreadContext() throws Exception { + ThreadContext threadContext = threadPool.getThreadContext(); + threadContext.putHeader(HEADER_KEY, HEADER_VALUE); + + handler.completeStream( + Version.CURRENT, + Collections.emptySet(), + mockFlightChannel, + mock(FlightTransportChannel.class), + 1L, + "test-action" + ); + + assertEquals("Caller's thread context should be preserved after completeStream", HEADER_VALUE, threadContext.getHeader(HEADER_KEY)); + } + + public void testSendErrorResponsePreservesCallerThreadContext() throws Exception { + ThreadContext threadContext = threadPool.getThreadContext(); + threadContext.putHeader(HEADER_KEY, HEADER_VALUE); + + handler.sendErrorResponse( + Version.CURRENT, + Collections.emptySet(), + mockFlightChannel, + mock(FlightTransportChannel.class), + 1L, + "test-action", + new RuntimeException("test error") + ); + + assertEquals( + "Caller's thread context should be preserved after sendErrorResponse", + HEADER_VALUE, + threadContext.getHeader(HEADER_KEY) + ); + } + + public void testSendResponseBatchPropagatesContextToExecutorThread() throws Exception { + ThreadContext threadContext = threadPool.getThreadContext(); + threadContext.putHeader(HEADER_KEY, HEADER_VALUE); + + CountDownLatch latch = new CountDownLatch(1); + + // Use a mock executor that runs the preserveContext-wrapped runnable + ExecutorService mockExecutor = mock(ExecutorService.class); + doAnswer(invocation -> { + Runnable command = invocation.getArgument(0); + executor.execute(() -> { + command.run(); + // After the preserveContext wrapper runs, capture the header + // The wrapper stashes the executor thread context, restores caller's, runs, then restores executor's + latch.countDown(); + }); + return null; + }).when(mockExecutor).execute(any(Runnable.class)); + when(mockFlightChannel.getExecutor()).thenReturn(mockExecutor); + + handler.sendResponseBatch( + Version.CURRENT, + Collections.emptySet(), + mockFlightChannel, + mock(FlightTransportChannel.class), + 1L, + "test-action", + mock(TransportResponse.class), + false, + false + ); + + assertTrue("Executor task should complete", latch.await(5, TimeUnit.SECONDS)); + } + + public void testMultipleBatchesMaintainCallerContext() throws Exception { + ThreadContext threadContext = threadPool.getThreadContext(); + threadContext.putHeader(HEADER_KEY, HEADER_VALUE); + + Set features = Collections.emptySet(); + FlightTransportChannel mockTransportChannel = mock(FlightTransportChannel.class); + + // Send multiple batches + for (int i = 0; i < 3; i++) { + handler.sendResponseBatch( + Version.CURRENT, + features, + mockFlightChannel, + mockTransportChannel, + 1L, + "test-action", + mock(TransportResponse.class), + false, + false + ); + + assertEquals( + "Caller's thread context should be preserved after batch " + (i + 1), + HEADER_VALUE, + threadContext.getHeader(HEADER_KEY) + ); + } + + // Complete the stream + handler.completeStream(Version.CURRENT, features, mockFlightChannel, mockTransportChannel, 1L, "test-action"); + + assertEquals("Caller's thread context should be preserved after completeStream", HEADER_VALUE, threadContext.getHeader(HEADER_KEY)); + } +} diff --git a/plugins/arrow-flight-rpc/src/test/java/org/opensearch/arrow/flight/transport/FlightTransportChannelTests.java b/plugins/arrow-flight-rpc/src/test/java/org/opensearch/arrow/flight/transport/FlightTransportChannelTests.java index ffa5640579caa..7c01f0c9afe1e 100644 --- a/plugins/arrow-flight-rpc/src/test/java/org/opensearch/arrow/flight/transport/FlightTransportChannelTests.java +++ b/plugins/arrow-flight-rpc/src/test/java/org/opensearch/arrow/flight/transport/FlightTransportChannelTests.java @@ -168,7 +168,6 @@ public void testCompleteStreamSuccess() { false, true, false, - null, null ); completeTask.close(); diff --git a/plugins/ingestion-kafka/src/internalClusterTest/java/org/opensearch/plugin/kafka/IngestFromKafkaIT.java b/plugins/ingestion-kafka/src/internalClusterTest/java/org/opensearch/plugin/kafka/IngestFromKafkaIT.java index ef74639e8bb50..57f7fcb6b4d0d 100644 --- a/plugins/ingestion-kafka/src/internalClusterTest/java/org/opensearch/plugin/kafka/IngestFromKafkaIT.java +++ b/plugins/ingestion-kafka/src/internalClusterTest/java/org/opensearch/plugin/kafka/IngestFromKafkaIT.java @@ -1385,6 +1385,110 @@ public void testWarmupPhase() throws Exception { waitForSearchableDocs(10, List.of(nodeA)); } + public void testKafkaIngestionWithMappingUpdate() throws Exception { + final String nodeA = internalCluster().startNode(); + + createIndex( + indexName, + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put("ingestion_source.type", "kafka") + .put("ingestion_source.pointer.init.reset", "earliest") + .put("ingestion_source.param.topic", topicName) + .put("ingestion_source.param.bootstrap_servers", kafka.getBootstrapServers()) + .put("index.replication.type", "SEGMENT") + .put("ingestion_source.error_strategy", "drop") + .build(), + "{\"dynamic\":\"strict\",\"properties\":{\"product\":{\"type\": \"text\"}," + + "\"attributes\":{\"properties\":{\"title\":{\"type\": \"keyword\"}}}}}" + ); + ensureGreen(indexName); + + // Step 1: Publish doc with known fields and wait for it to be searchable + produceData( + "{\"_id\":\"1\", \"_op_type\":\"index\",\"_source\":" + + "{\"product\":\"product1\", \"attributes\":{\"title\":\"Screenshot 2026\"}}}" + ); + + waitForSearchableDocs(1, List.of(nodeA)); + + // Step 2: Publish doc with unknown field - strict mapping should reject it + produceData( + "{\"_id\":\"2\", \"_op_type\":\"index\",\"_source\":" + + "{\"product\":\"product2\", \"attributes\":{\"title\":\"Banner Ad\",\"is_promotional\":true}}}" + ); + + waitForState(() -> { + PollingIngestStats stats = client().admin().indices().prepareStats(indexName).get().getIndex(indexName).getShards()[0] + .getPollingIngestStats(); + return stats != null && stats.getMessageProcessorStats().totalFailuresDroppedCount() >= 1L; + }); + + PollingIngestStats statsBeforePutMapping = client().admin() + .indices() + .prepareStats(indexName) + .get() + .getIndex(indexName) + .getShards()[0].getPollingIngestStats(); + assertThat( + "message2 should fail due to strict mapping", + statsBeforePutMapping.getMessageProcessorStats().totalFailedCount(), + is(1L) + ); + assertThat("message2 should be dropped", statsBeforePutMapping.getMessageProcessorStats().totalFailuresDroppedCount(), is(1L)); + + // Step 3: Add is_promotional to the mapping via PUT mapping API + assertAcked( + client().admin() + .indices() + .preparePutMapping(indexName) + .setSource( + "{\"properties\":{\"attributes\":{\"properties\":" + + "{\"is_promotional\":{\"type\":\"boolean\",\"doc_values\":false}}}}}", + org.opensearch.common.xcontent.XContentType.JSON + ) + ); + + waitForState(() -> { + Map mappings = client().admin() + .indices() + .prepareGetMappings(indexName) + .get() + .getMappings() + .get(indexName) + .getSourceAsMap(); + @SuppressWarnings("unchecked") + Map properties = (Map) mappings.get("properties"); + @SuppressWarnings("unchecked") + Map attrProps = (Map) ((Map) properties.get("attributes")).get("properties"); + return attrProps.containsKey("is_promotional"); + }); + + // Step 4: Publish doc with is_promotional - should now succeed after the mapping update + produceData( + "{\"_id\":\"3\", \"_op_type\":\"index\",\"_source\":" + + "{\"product\":\"product3\", \"attributes\":{\"title\":\"Holiday Sale\",\"is_promotional\":false}}}" + ); + + waitForSearchableDocs(2, List.of(nodeA)); + + // Step 5: Query on the new boolean field to verify it was indexed correctly + waitForState(() -> { + refresh(indexName); + SearchResponse response = client().prepareSearch(indexName) + .setQuery(new TermQueryBuilder("attributes.is_promotional", false)) + .get(); + return response.getHits().getTotalHits().value() == 1L + && "product3".equals(response.getHits().getAt(0).getSourceAsMap().get("product")); + }); + + // Verify failedCount is still 1 (only message2 failed, message3 succeeded) + PollingIngestStats statsAfterPut = client().admin().indices().prepareStats(indexName).get().getIndex(indexName).getShards()[0] + .getPollingIngestStats(); + assertThat(statsAfterPut.getMessageProcessorStats().totalFailedCount(), is(1L)); + } + public void testDynamicWarmupSettingsUpdate() throws Exception { // Step 1: Publish messages before creating the index for (int i = 0; i < 5; i++) { diff --git a/plugins/mapper-annotated-text/src/internalClusterTest/java/org/opensearch/index/mapper/annotatedtext/AnnotatedTextFieldMapperTests.java b/plugins/mapper-annotated-text/src/internalClusterTest/java/org/opensearch/index/mapper/annotatedtext/AnnotatedTextFieldMapperTests.java index f8912d7760949..5c84488165dfb 100644 --- a/plugins/mapper-annotated-text/src/internalClusterTest/java/org/opensearch/index/mapper/annotatedtext/AnnotatedTextFieldMapperTests.java +++ b/plugins/mapper-annotated-text/src/internalClusterTest/java/org/opensearch/index/mapper/annotatedtext/AnnotatedTextFieldMapperTests.java @@ -47,6 +47,8 @@ import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.BytesRef; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.core.xcontent.ToXContent; import org.opensearch.core.xcontent.XContentBuilder; @@ -58,7 +60,9 @@ import org.opensearch.index.analysis.NamedAnalyzer; import org.opensearch.index.analysis.StandardTokenizerFactory; import org.opensearch.index.analysis.TokenFilterFactory; +import org.opensearch.index.engine.dataformat.DocumentInput; import org.opensearch.index.mapper.DocumentMapper; +import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.MapperParsingException; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.mapper.MapperTestCase; @@ -68,11 +72,13 @@ import org.opensearch.plugins.Plugin; import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Set; @@ -606,4 +612,108 @@ public void testAnalyzedFieldPositionIncrementWithoutPositions() { } } + private Settings pluggableSettings() { + return Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatAnnotatedText() throws IOException { + DocumentMapper mapper = createDocumentMapper(pluggableSettings(), fieldMapping(this::minimalMapping)); + TestDocumentInput docInput = new TestDocumentInput(); + mapper.parse(source(b -> b.field("field", "some annotated text")), docInput); + + boolean found = docInput.getCapturedFields() + .stream() + .anyMatch(e -> e.getKey().name().equals("field") && e.getValue().equals("some annotated text")); + assertTrue("Expected annotated_text field captured with value", found); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatNullValueSkipped() throws IOException { + DocumentMapper mapper = createDocumentMapper(pluggableSettings(), fieldMapping(this::minimalMapping)); + TestDocumentInput docInput = new TestDocumentInput(); + mapper.parse(source(b -> b.nullField("field")), docInput); + + boolean hasField = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals("field")); + assertFalse("Expected no captured field for null value", hasField); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatWithExternalValue() throws IOException { + DocumentMapper mapper = createDocumentMapper(pluggableSettings(), mapping(b -> { + b.startObject("text_field"); + b.field("type", "text"); + b.startObject("fields"); + b.startObject("annotated").field("type", "annotated_text").endObject(); + b.endObject(); + b.endObject(); + })); + TestDocumentInput docInput = new TestDocumentInput(); + mapper.parse(source(b -> b.field("text_field", "external_value")), docInput); + + boolean found = docInput.getCapturedFields() + .stream() + .anyMatch(e -> e.getKey().name().equals("text_field.annotated") && e.getValue().equals("external_value")); + assertTrue("Expected annotated_text sub-field captured with external value", found); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggablePathEquivalenceWithLucenePath() throws IOException { + // Scenario 1: annotated text value + { + DocumentMapper luceneMapper = createDocumentMapper(fieldMapping(this::minimalMapping)); + ParsedDocument luceneDoc = luceneMapper.parse(source(b -> b.field("field", "some annotated text"))); + IndexableField[] luceneFields = luceneDoc.rootDoc().getFields("field"); + + DocumentMapper pluggableMapper = createDocumentMapper(pluggableSettings(), fieldMapping(this::minimalMapping)); + TestDocumentInput docInput = new TestDocumentInput(); + pluggableMapper.parse(source(b -> b.field("field", "some annotated text")), docInput); + + assertTrue("Lucene path should produce field 'field'", luceneFields.length > 0); + assertEquals("some annotated text", luceneFields[0].stringValue()); + boolean pluggableFound = docInput.getCapturedFields() + .stream() + .anyMatch(e -> e.getKey().name().equals("field") && e.getValue().equals("some annotated text")); + assertTrue("Pluggable path should capture field 'field' with value 'some annotated text'", pluggableFound); + } + + // Scenario 2: null value — no field produced + { + DocumentMapper luceneMapper = createDocumentMapper(fieldMapping(this::minimalMapping)); + ParsedDocument luceneDoc = luceneMapper.parse(source(b -> b.nullField("field"))); + IndexableField[] luceneFields = luceneDoc.rootDoc().getFields("field"); + + DocumentMapper pluggableMapper = createDocumentMapper(pluggableSettings(), fieldMapping(this::minimalMapping)); + TestDocumentInput docInput = new TestDocumentInput(); + pluggableMapper.parse(source(b -> b.nullField("field")), docInput); + + assertEquals("Lucene path should produce no field 'field'", 0, luceneFields.length); + boolean pluggableHasField = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals("field")); + assertFalse("Pluggable path should produce no field 'field'", pluggableHasField); + } + } + + private static class TestDocumentInput implements DocumentInput { + private final List> capturedFields = new ArrayList<>(); + + @Override + public Object getFinalInput() { + return null; + } + + @Override + public void addField(MappedFieldType fieldType, Object value) { + capturedFields.add(Map.entry(fieldType, value)); + } + + @Override + public void setRowId(String rowIdFieldName, long rowId) {} + + @Override + public void close() {} + + public List> getCapturedFields() { + return capturedFields; + } + } } diff --git a/plugins/mapper-annotated-text/src/main/java/org/opensearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java b/plugins/mapper-annotated-text/src/main/java/org/opensearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java index 952cff96860f2..1e696b77a1a32 100644 --- a/plugins/mapper-annotated-text/src/main/java/org/opensearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java +++ b/plugins/mapper-annotated-text/src/main/java/org/opensearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java @@ -575,13 +575,7 @@ protected AnnotatedTextFieldMapper clone() { @Override protected void parseCreateField(ParseContext context) throws IOException { - final String value; - if (context.externalValueSet()) { - value = context.externalValue().toString(); - } else { - value = context.parser().textOrNull(); - } - + final String value = getTextValue(context); if (value == null) { return; } @@ -595,6 +589,22 @@ protected void parseCreateField(ParseContext context) throws IOException { } } + @Override + protected void parseCreateFieldForPluggableFormat(ParseContext context) throws IOException { + final String value = getTextValue(context); + if (value == null) { + return; + } + context.documentInput().addField(fieldType(), value); + } + + private String getTextValue(ParseContext context) throws IOException { + if (context.externalValueSet()) { + return context.externalValue().toString(); + } + return context.parser().textOrNull(); + } + @Override protected String contentType() { return CONTENT_TYPE; diff --git a/plugins/mapper-murmur3/src/main/java/org/opensearch/index/mapper/murmur3/Murmur3FieldMapper.java b/plugins/mapper-murmur3/src/main/java/org/opensearch/index/mapper/murmur3/Murmur3FieldMapper.java index 4e87b03132055..dc67310afface 100644 --- a/plugins/mapper-murmur3/src/main/java/org/opensearch/index/mapper/murmur3/Murmur3FieldMapper.java +++ b/plugins/mapper-murmur3/src/main/java/org/opensearch/index/mapper/murmur3/Murmur3FieldMapper.java @@ -162,4 +162,19 @@ protected void parseCreateField(ParseContext context) throws IOException { } } + @Override + protected void parseCreateFieldForPluggableFormat(ParseContext context) throws IOException { + final Object value; + if (context.externalValueSet()) { + value = context.externalValue(); + } else { + value = context.parser().textOrNull(); + } + if (value != null) { + final BytesRef bytes = new BytesRef(value.toString()); + final long hash = MurmurHash3.hash128(bytes.bytes, bytes.offset, bytes.length, 0, new MurmurHash3.Hash128()).h1; + context.documentInput().addField(fieldType(), hash); + } + } + } diff --git a/plugins/mapper-murmur3/src/test/java/org/opensearch/index/mapper/murmur3/Murmur3FieldMapperTests.java b/plugins/mapper-murmur3/src/test/java/org/opensearch/index/mapper/murmur3/Murmur3FieldMapperTests.java index 04d46db50592c..b9f160a310fb2 100644 --- a/plugins/mapper-murmur3/src/test/java/org/opensearch/index/mapper/murmur3/Murmur3FieldMapperTests.java +++ b/plugins/mapper-murmur3/src/test/java/org/opensearch/index/mapper/murmur3/Murmur3FieldMapperTests.java @@ -35,17 +35,25 @@ import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; +import org.apache.lucene.util.BytesRef; +import org.opensearch.common.hash.MurmurHash3; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.engine.dataformat.DocumentInput; import org.opensearch.index.mapper.DocumentMapper; +import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.MapperTestCase; import org.opensearch.index.mapper.ParsedDocument; import org.opensearch.plugin.mapper.MapperMurmur3Plugin; import org.opensearch.plugins.Plugin; import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.List; +import java.util.Map; public class Murmur3FieldMapperTests extends MapperTestCase { @@ -80,4 +88,98 @@ public void testDefaults() throws Exception { assertEquals(DocValuesType.SORTED_NUMERIC, field.fieldType().docValuesType()); } + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatMurmur3() throws IOException { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + DocumentMapper mapper = createDocumentMapper(pluggableSettings, fieldMapping(this::minimalMapping)); + TestDocumentInput docInput = new TestDocumentInput(); + mapper.parse(source(b -> b.field("field", "test_value")), docInput); + + boolean found = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals("field")); + assertTrue("Expected murmur3 field captured with hash value", found); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatMurmur3NullSkipped() throws IOException { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + DocumentMapper mapper = createDocumentMapper(pluggableSettings, fieldMapping(this::minimalMapping)); + TestDocumentInput docInput = new TestDocumentInput(); + mapper.parse(source(b -> b.nullField("field")), docInput); + + boolean hasField = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals("field")); + assertFalse("Expected no captured field for null value", hasField); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggablePathEquivalenceWithLucenePath() throws IOException { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + + // Scenario 1: murmur3 value + { + DocumentMapper luceneMapper = createDocumentMapper(fieldMapping(this::minimalMapping)); + ParsedDocument luceneDoc = luceneMapper.parse(source(b -> b.field("field", "test_value"))); + IndexableField[] luceneFields = luceneDoc.rootDoc().getFields("field"); + + DocumentMapper pluggableMapper = createDocumentMapper(pluggableSettings, fieldMapping(this::minimalMapping)); + TestDocumentInput docInput = new TestDocumentInput(); + pluggableMapper.parse(source(b -> b.field("field", "test_value")), docInput); + + assertTrue("Lucene path should produce field 'field'", luceneFields.length > 0); + boolean pluggableFound = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals("field")); + assertTrue("Pluggable path should capture field 'field'", pluggableFound); + } + + // Scenario 2: null value — no field produced + { + DocumentMapper luceneMapper = createDocumentMapper(fieldMapping(this::minimalMapping)); + ParsedDocument luceneDoc = luceneMapper.parse(source(b -> b.nullField("field"))); + IndexableField[] luceneFields = luceneDoc.rootDoc().getFields("field"); + + DocumentMapper pluggableMapper = createDocumentMapper(pluggableSettings, fieldMapping(this::minimalMapping)); + TestDocumentInput docInput = new TestDocumentInput(); + pluggableMapper.parse(source(b -> b.nullField("field")), docInput); + + assertEquals("Lucene path should produce no field 'field'", 0, luceneFields.length); + boolean pluggableHasField = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals("field")); + assertFalse("Pluggable path should produce no field 'field'", pluggableHasField); + } + } + + public void testHashCalculation() throws Exception { + String testValue = "test_value"; + BytesRef bytes = new BytesRef(testValue); + long hash = MurmurHash3.hash128(bytes.bytes, bytes.offset, bytes.length, 0, new MurmurHash3.Hash128()).h1; + + // Verify hash is calculated (non-zero for non-empty input) + assertNotEquals("Hash should not be zero for non-empty input", 0L, hash); + + // Verify consistent hashing + BytesRef bytes2 = new BytesRef(testValue); + long hash2 = MurmurHash3.hash128(bytes2.bytes, bytes2.offset, bytes2.length, 0, new MurmurHash3.Hash128()).h1; + assertEquals("Hash should be consistent for same input", hash, hash2); + } + + private static class TestDocumentInput implements DocumentInput { + private final List> capturedFields = new ArrayList<>(); + + @Override + public Object getFinalInput() { + return null; + } + + @Override + public void addField(MappedFieldType fieldType, Object value) { + capturedFields.add(Map.entry(fieldType, value)); + } + + @Override + public void setRowId(String rowIdFieldName, long rowId) {} + + @Override + public void close() {} + + public List> getCapturedFields() { + return capturedFields; + } + } } diff --git a/plugins/mapper-size/src/internalClusterTest/java/org/opensearch/index/mapper/size/SizeMappingTests.java b/plugins/mapper-size/src/internalClusterTest/java/org/opensearch/index/mapper/size/SizeMappingTests.java index 49aab68be416b..4e477803eb24f 100644 --- a/plugins/mapper-size/src/internalClusterTest/java/org/opensearch/index/mapper/size/SizeMappingTests.java +++ b/plugins/mapper-size/src/internalClusterTest/java/org/opensearch/index/mapper/size/SizeMappingTests.java @@ -35,11 +35,14 @@ import org.apache.lucene.index.IndexableField; import org.opensearch.common.compress.CompressedXContent; import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.core.common.bytes.BytesReference; import org.opensearch.core.xcontent.MediaTypeRegistry; import org.opensearch.index.IndexService; +import org.opensearch.index.engine.dataformat.DocumentInput; import org.opensearch.index.mapper.DocumentMapper; +import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.mapper.ParsedDocument; import org.opensearch.index.mapper.SourceToParse; @@ -48,7 +51,10 @@ import org.opensearch.test.InternalSettingsPlugin; import org.opensearch.test.OpenSearchSingleNodeTestCase; +import java.util.ArrayList; import java.util.Collection; +import java.util.List; +import java.util.Map; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.nullValue; @@ -116,4 +122,57 @@ public void testThatDisablingWorksWhenMerging() throws Exception { assertThat(docMapper.metadataMapper(SizeFieldMapper.class).enabled(), is(false)); } + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testSizeFieldMapperPluggableFormat() throws Exception { + String mapping = XContentFactory.jsonBuilder() + .startObject() + .startObject("_size") + .field("enabled", true) + .endObject() + .endObject() + .toString(); + Settings settings = Settings.builder().put("index.pluggable.dataformat.enabled", true).build(); + IndexService service = createIndex("test", settings); + DocumentMapper docMapper = service.mapperService() + .merge("type", new CompressedXContent(mapping), MapperService.MergeReason.MAPPING_UPDATE); + + TestDocumentInput docInput = new TestDocumentInput(); + docMapper.parse( + new SourceToParse( + "test", + "1", + BytesReference.bytes(XContentFactory.jsonBuilder().startObject().field("foo", "bar").endObject()), + MediaTypeRegistry.JSON + ), + docInput + ); + + // SizeFieldMapper.postParse adds the source length via documentInput when pluggable format is enabled + boolean found = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals("_size")); + assertTrue("Expected _size field captured via postParse pluggable format path", found); + } + + private static class TestDocumentInput implements DocumentInput { + private final List> capturedFields = new ArrayList<>(); + + @Override + public Object getFinalInput() { + return null; + } + + @Override + public void addField(MappedFieldType fieldType, Object value) { + capturedFields.add(Map.entry(fieldType, value)); + } + + @Override + public void setRowId(String rowIdFieldName, long rowId) {} + + @Override + public void close() {} + + public List> getCapturedFields() { + return capturedFields; + } + } } diff --git a/plugins/mapper-size/src/main/java/org/opensearch/index/mapper/size/SizeFieldMapper.java b/plugins/mapper-size/src/main/java/org/opensearch/index/mapper/size/SizeFieldMapper.java index dc966a3dfc50f..f16c9a40390d6 100644 --- a/plugins/mapper-size/src/main/java/org/opensearch/index/mapper/size/SizeFieldMapper.java +++ b/plugins/mapper-size/src/main/java/org/opensearch/index/mapper/size/SizeFieldMapper.java @@ -99,7 +99,11 @@ public void postParse(ParseContext context) throws IOException { return; } final int value = context.sourceToParse().source().length(); - context.doc().addAll(NumberType.INTEGER.createFields(name(), value, true, true, false, true)); + if (isPluggableDataFormatFeatureEnabled(context)) { + context.documentInput().addField(fieldType(), value); + } else { + context.doc().addAll(NumberType.INTEGER.createFields(name(), value, true, true, false, true)); + } } @Override diff --git a/sandbox/build.gradle b/sandbox/build.gradle index 379595a4ba087..d98658d071941 100644 --- a/sandbox/build.gradle +++ b/sandbox/build.gradle @@ -6,29 +6,50 @@ * compatible open source license. */ +import org.gradle.api.publish.plugins.PublishingPlugin + /** * This module provides a space in OpenSearch for the community to easily experiment with new ideas and innovate. * Ideally, this is where an experimental features will reside before it can be promoted to the corresponding directory * in the project root. The sandbox module contains three subdirectories, that mirror the root libs, modules and * plugins directories, each with similar intention. * - * All artifacts from the sandbox/libs and sandbox/modules will be included in the snapshot distributions automatically. + * All artifacts from the sandbox/libs and sandbox/modules can be included in the snapshot distributions. * During assembling distributions, however, we will check if the following two conditions are met, for including the * sandbox modules, - + * * 1. The distribution is a snapshot i.e. the build system property build.snapshot is set to true. We use this because, * it will prevent accidental inclusion of these artifacts in a release distribution. * - * 2. The sandbox.enabled system property is set to true. This new extra flag is added because we can exclude the - * modules from the snapshot distributions, if needed. For instance, we may want to run performance tests on snapshots - * without the sandbox modules. + * 2. The sandbox.enabled system property is set to true. This extra flag keeps sandbox artifacts disabled by default + * and allows opting in only when needed. * - * To build the distributions without the sandbox modules, - * ./gradlew assemble -Dsandbox.enabled=false + * To build the distributions with the sandbox modules, + * ./gradlew assemble -Dsandbox.enabled=true * - * Similarly we can run OpenSearch from source without the sandbox modules - * ./gradlew run -Dsandbox.enabled=false + * Similarly we can publish sandbox artifacts to Maven local with + * ./gradlew publishToMavenLocal -Dsandbox.enabled=true */ +def sandboxEnabled = System.getProperty("sandbox.enabled", "false") == "true" + subprojects { group = 'org.opensearch.sandbox' + + if (sandboxEnabled == false) { + afterEvaluate { + tasks.configureEach { task -> + if ( + task.group == PublishingPlugin.PUBLISH_TASK_GROUP + || task.name.startsWith('generatePomFileFor') + || task.name.startsWith('generateMetadataFileFor') + || task.name == 'validatePom' + || (task.name.startsWith('validate') && task.name.endsWith('Pom')) + ) { + task.enabled = false + task.setDependsOn([]) + task.onlyIf { false } + } + } + } + } } diff --git a/sandbox/libs/analytics-framework/build.gradle b/sandbox/libs/analytics-framework/build.gradle index 34a9b30d171ed..4deae9e98f7a0 100644 --- a/sandbox/libs/analytics-framework/build.gradle +++ b/sandbox/libs/analytics-framework/build.gradle @@ -38,6 +38,11 @@ dependencies { runtimeOnly 'org.codehaus.janino:janino:3.1.9' runtimeOnly 'org.codehaus.janino:commons-compiler:3.1.9' runtimeOnly 'org.jooq:joou-java-6:0.9.4' + + testImplementation(project(":test:framework")) { + exclude group: 'org.opensearch', module: 'opensearch-core' + exclude group: 'org.opensearch', module: 'opensearch-common' + } runtimeOnly 'com.jayway.jsonpath:json-path:2.9.0' runtimeOnly "com.fasterxml.jackson.core:jackson-core:${versions.jackson}" runtimeOnly "com.fasterxml.jackson.core:jackson-databind:${versions.jackson_databind}" diff --git a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/jni/NativeHandle.java b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/jni/NativeHandle.java index a785484da5604..51fe703c2b4d2 100644 --- a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/jni/NativeHandle.java +++ b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/jni/NativeHandle.java @@ -9,13 +9,24 @@ package org.opensearch.analytics.backend.jni; import java.lang.ref.Cleaner; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicBoolean; /** * Base class for type-safe native pointer wrappers. - * Provides automatic resource management and prevents use-after-close errors. - * Subclasses must implement {@link #doClose()} to release native resources. + * + *

Provides automatic resource management, prevents use-after-close errors, + * and tracks all live handles in a global registry to detect stale pointer usage. + * + *

Subclasses must implement {@link #doClose()} to release native resources. * Cleaner is used to ensure resources are cleaned up even if the object is not explicitly closed. + * + *

Stale pointer detection

+ *

All live native pointers are tracked in a global {@link #LIVE_HANDLES} set. + * When a handle is created, its pointer is registered. When closed, it is unregistered. + * Use {@link #isLivePointer(long)} to check if a raw pointer value is still valid + * before passing it to native code. */ public abstract class NativeHandle implements AutoCloseable { @@ -28,6 +39,13 @@ public abstract class NativeHandle implements AutoCloseable { private static final Cleaner CLEANER = Cleaner.create(); + /** + * Global registry of all live native pointers. + * Used to detect use-after-free: if a pointer is not in this set, it has + * either been closed or was never created by a NativeHandle. + */ + private static final Set LIVE_HANDLES = ConcurrentHashMap.newKeySet(); + /** * Creates a new native handle. * @param ptr the native pointer (must not be 0) @@ -38,6 +56,7 @@ protected NativeHandle(long ptr) { throw new IllegalArgumentException("Null native pointer"); } this.ptr = ptr; + LIVE_HANDLES.add(ptr); this.cleanable = CLEANER.register(this, new CleanupAction(ptr, this::doClose)); } @@ -47,7 +66,7 @@ protected NativeHandle(long ptr) { */ public void ensureOpen() { if (closed.get()) { - throw new IllegalStateException("Handle already closed"); + throw new IllegalStateException(getClass().getSimpleName() + " already closed (ptr=0x" + Long.toHexString(ptr) + ")"); } } @@ -58,12 +77,15 @@ public void ensureOpen() { */ public long getPointer() { ensureOpen(); + assert LIVE_HANDLES.contains(ptr) : "pointer 0x" + Long.toHexString(ptr) + " not in live registry"; return ptr; } @Override public void close() { if (closed.compareAndSet(false, true)) { + assert LIVE_HANDLES.contains(ptr) : "closing handle not in live registry: 0x" + Long.toHexString(ptr); + LIVE_HANDLES.remove(ptr); cleanable.clean(); } } @@ -75,9 +97,55 @@ public void close() { */ protected abstract void doClose(); + // ---- Stale pointer detection (static API) ---- + + /** + * Checks if a raw pointer value corresponds to a live, open NativeHandle. + * Use this before passing raw pointer values to native code to detect + * use-after-free bugs. + * + * @param ptr the raw pointer value to check + * @return true if the pointer is tracked and has not been closed + */ + public static boolean isLivePointer(long ptr) { + return LIVE_HANDLES.contains(ptr); + } + + /** + * Validates that a raw pointer value is live, throwing if it is stale or unknown. + * Use this as a guard before FFM downcalls that accept raw pointer arguments. + * + * @param ptr the raw pointer value to validate + * @param name a descriptive name for error messages (e.g., "stream", "reader") + * @throws IllegalStateException if the pointer is not in the live handle registry + */ + public static void validatePointer(long ptr, String name) { + if (ptr == NULL_POINTER) { + throw new IllegalArgumentException(name + " pointer is null (0)"); + } + if (LIVE_HANDLES.contains(ptr) == false) { + throw new IllegalStateException( + name + + " pointer 0x" + + Long.toHexString(ptr) + + " is not a live handle — " + + "it may have been closed or was never created by NativeHandle" + ); + } + } + + /** + * Returns the number of currently live handles. Useful for leak detection in tests. + * + * @return the count of open native handles + */ + public static int liveHandleCount() { + return LIVE_HANDLES.size(); + } + /** * Cleans up the native resource. - * Called by the cleaner when the handle is garbage collected. + * Called by the cleaner when the handle is garbage collected without explicit close. */ private static final class CleanupAction implements Runnable { private final long ptr; @@ -90,6 +158,7 @@ private static final class CleanupAction implements Runnable { @Override public void run() { + LIVE_HANDLES.remove(ptr); doClose.run(); } } diff --git a/sandbox/libs/analytics-framework/src/test/java/org/opensearch/analytics/backend/jni/NativeHandleTests.java b/sandbox/libs/analytics-framework/src/test/java/org/opensearch/analytics/backend/jni/NativeHandleTests.java new file mode 100644 index 0000000000000..d6ac2a3829427 --- /dev/null +++ b/sandbox/libs/analytics-framework/src/test/java/org/opensearch/analytics/backend/jni/NativeHandleTests.java @@ -0,0 +1,125 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.analytics.backend.jni; + +import org.opensearch.test.OpenSearchTestCase; + +/** + * Tests for {@link NativeHandle} lifecycle, stale pointer detection, and registry tracking. + */ +public class NativeHandleTests extends OpenSearchTestCase { + + /** Concrete test handle that records whether doClose was called. */ + private static class TestHandle extends NativeHandle { + boolean closed; + + TestHandle(long ptr) { + super(ptr); + } + + @Override + protected void doClose() { + closed = true; + } + } + + public void testConstructorRejectsZeroPointer() { + expectThrows(IllegalArgumentException.class, () -> new TestHandle(0L)); + } + + public void testGetPointerReturnsValue() { + TestHandle handle = new TestHandle(42L); + assertEquals(42L, handle.getPointer()); + handle.close(); + } + + public void testGetPointerAfterCloseThrows() { + TestHandle handle = new TestHandle(42L); + handle.close(); + expectThrows(IllegalStateException.class, handle::getPointer); + } + + public void testCloseCallsDoClose() { + TestHandle handle = new TestHandle(42L); + assertFalse(handle.closed); + handle.close(); + assertTrue(handle.closed); + } + + public void testDoubleCloseIsIdempotent() { + TestHandle handle = new TestHandle(42L); + handle.close(); + assertTrue(handle.closed); + // Second close should not throw + handle.close(); + assertTrue(handle.closed); + } + + // ---- Stale pointer registry tests ---- + + public void testIsLivePointerTrueWhileOpen() { + TestHandle handle = new TestHandle(100L); + assertTrue(NativeHandle.isLivePointer(100L)); + handle.close(); + } + + public void testIsLivePointerFalseAfterClose() { + TestHandle handle = new TestHandle(101L); + handle.close(); + assertFalse(NativeHandle.isLivePointer(101L)); + } + + public void testIsLivePointerFalseForUnknownPointer() { + assertFalse(NativeHandle.isLivePointer(999999L)); + } + + public void testValidatePointerSucceedsWhileOpen() { + TestHandle handle = new TestHandle(200L); + // Should not throw + NativeHandle.validatePointer(200L, "test"); + handle.close(); + } + + public void testValidatePointerThrowsAfterClose() { + TestHandle handle = new TestHandle(201L); + handle.close(); + IllegalStateException ex = expectThrows(IllegalStateException.class, () -> NativeHandle.validatePointer(201L, "test")); + assertTrue(ex.getMessage().contains("test")); + assertTrue(ex.getMessage().contains("not a live handle")); + } + + public void testValidatePointerThrowsForNullPointer() { + expectThrows(IllegalArgumentException.class, () -> NativeHandle.validatePointer(0L, "test")); + } + + public void testValidatePointerThrowsForUnknownPointer() { + expectThrows(IllegalStateException.class, () -> NativeHandle.validatePointer(888888L, "unknown")); + } + + public void testLiveHandleCountTracksOpenHandles() { + int baseline = NativeHandle.liveHandleCount(); + TestHandle h1 = new TestHandle(301L); + TestHandle h2 = new TestHandle(302L); + assertEquals(baseline + 2, NativeHandle.liveHandleCount()); + + h1.close(); + assertEquals(baseline + 1, NativeHandle.liveHandleCount()); + + h2.close(); + assertEquals(baseline, NativeHandle.liveHandleCount()); + } + + public void testEnsureOpenMessageIncludesClassName() { + TestHandle handle = new TestHandle(400L); + handle.close(); + IllegalStateException ex = expectThrows(IllegalStateException.class, handle::ensureOpen); + assertTrue(ex.getMessage().contains("TestHandle")); + assertTrue(ex.getMessage().contains("0x190")); // 400 in hex + } +} diff --git a/sandbox/libs/dataformat-native/build.gradle b/sandbox/libs/dataformat-native/build.gradle new file mode 100644 index 0000000000000..301208fbfe22d --- /dev/null +++ b/sandbox/libs/dataformat-native/build.gradle @@ -0,0 +1,111 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +apply plugin: 'opensearch.build' + +description = 'Shared native bridge utilities for OpenSearch sandbox plugins' + +dependencies { + api project(':libs:opensearch-core') + api project(':libs:opensearch-common') + implementation "org.apache.logging.log4j:log4j-api:${versions.log4j}" + compileOnly "org.apache.arrow:arrow-c-data:${versions.arrow}" + + testImplementation "com.carrotsearch.randomizedtesting:randomizedtesting-runner:${versions.randomizedrunner}" + testImplementation "junit:junit:${versions.junit}" + testImplementation "org.hamcrest:hamcrest:${versions.hamcrest}" + testImplementation(project(":test:framework")) { + exclude group: 'org.opensearch', module: 'opensearch-core' + exclude group: 'org.opensearch', module: 'opensearch-common' + } +} + +tasks.named('forbiddenApisMain').configure { + replaceSignatureFiles 'jdk-signatures' +} + +jarHell.enabled = false + +// FFM is stable since JDK 22. Override project-wide JDK 21 target for this module. +java { + sourceCompatibility = JavaVersion.toVersion(25) + targetCompatibility = JavaVersion.toVersion(25) +} + +// missingJavadoc hardcodes --release 21 which hides FFM types (stable since JDK 22). +// Disable for this module until the framework supports per-project release overrides. +tasks.matching { it.name == 'missingJavadoc' }.configureEach { + enabled = false +} + +tasks.withType(Test).configureEach { + jvmArgs += ['--enable-native-access=ALL-UNNAMED'] +} + +// ═══════════════════════════════════════════════════════════════════ +// Unified Rust native library build (FFM) +// ═══════════════════════════════════════════════════════════════════ + +def osName = System.getProperty('os.name').toLowerCase() +def libPrefix = osName.contains('windows') ? '' : 'lib' +def libExtension = osName.contains('windows') ? '.dll' : (osName.contains('mac') ? '.dylib' : '.so') +def nativeLibName = "opensearch_native" +def buildType = project.hasProperty('rustDebug') ? 'debug' : 'release' +def rustWorkspaceDir = file("${projectDir}/rust") +def nativeLibFile = file("${rustWorkspaceDir}/target/${buildType}/${libPrefix}${nativeLibName}${libExtension}") + +task buildRustLibrary(type: Exec) { + description = 'Build the unified Rust native library from the Cargo workspace' + group = 'build' + workingDir rustWorkspaceDir + + def cargoExecutable = 'cargo' + def possibleCargoPaths = [ + System.getenv('HOME') + '/.cargo/bin/cargo', + '/usr/local/bin/cargo', + 'cargo' + ] + for (String path : possibleCargoPaths) { + if (new File(path).exists()) { cargoExecutable = path; break } + } + + def cargoArgs = [cargoExecutable, 'build', '-p', 'opensearch-native-lib'] + if (buildType == 'release') { cargoArgs.add('--release') } + commandLine cargoArgs + + inputs.files fileTree("${rustWorkspaceDir}/common/src") + inputs.files fileTree("${rustWorkspaceDir}/lib/src") + inputs.file "${rustWorkspaceDir}/Cargo.toml" + outputs.file nativeLibFile +} + +// Expose the native lib path so plugins can reference it for tests +ext.nativeLibPath = nativeLibFile + +test { + systemProperty 'tests.security.manager', 'false' + systemProperty 'native.lib.path', nativeLibFile.absolutePath + jvmArgs += ['--enable-native-access=ALL-UNNAMED'] + dependsOn buildRustLibrary +} + +tasks.named('thirdPartyAudit').configure { + ignoreMissingClasses( + 'org.osgi.framework.Bundle', + 'org.osgi.framework.BundleActivator', + 'org.osgi.framework.BundleContext', + 'org.osgi.framework.BundleEvent', + 'org.osgi.framework.FrameworkUtil', + 'org.osgi.framework.ServiceReference', + 'org.osgi.framework.ServiceRegistration', + 'org.osgi.framework.SynchronousBundleListener', + 'org.osgi.framework.wiring.BundleRevision', + 'org.osgi.framework.wiring.BundleWire', + 'org.osgi.framework.wiring.BundleWiring' + ) +} diff --git a/sandbox/libs/native-bridge-spi/licenses/log4j-api-2.25.3.jar.sha1 b/sandbox/libs/dataformat-native/licenses/log4j-api-2.25.3.jar.sha1 similarity index 100% rename from sandbox/libs/native-bridge-spi/licenses/log4j-api-2.25.3.jar.sha1 rename to sandbox/libs/dataformat-native/licenses/log4j-api-2.25.3.jar.sha1 diff --git a/sandbox/libs/native-bridge-spi/licenses/log4j-api-LICENSE.txt b/sandbox/libs/dataformat-native/licenses/log4j-api-LICENSE.txt similarity index 100% rename from sandbox/libs/native-bridge-spi/licenses/log4j-api-LICENSE.txt rename to sandbox/libs/dataformat-native/licenses/log4j-api-LICENSE.txt diff --git a/sandbox/libs/native-bridge-spi/licenses/log4j-api-NOTICE.txt b/sandbox/libs/dataformat-native/licenses/log4j-api-NOTICE.txt similarity index 100% rename from sandbox/libs/native-bridge-spi/licenses/log4j-api-NOTICE.txt rename to sandbox/libs/dataformat-native/licenses/log4j-api-NOTICE.txt diff --git a/sandbox/plugins/analytics-backend-datafusion/jni/.cargo/config.toml b/sandbox/libs/dataformat-native/rust/.cargo/config.toml similarity index 100% rename from sandbox/plugins/analytics-backend-datafusion/jni/.cargo/config.toml rename to sandbox/libs/dataformat-native/rust/.cargo/config.toml diff --git a/sandbox/plugins/analytics-backend-datafusion/Cargo.toml b/sandbox/libs/dataformat-native/rust/Cargo.toml similarity index 62% rename from sandbox/plugins/analytics-backend-datafusion/Cargo.toml rename to sandbox/libs/dataformat-native/rust/Cargo.toml index c08e2bb08acc7..b34cd03bd2d40 100644 --- a/sandbox/plugins/analytics-backend-datafusion/Cargo.toml +++ b/sandbox/libs/dataformat-native/rust/Cargo.toml @@ -1,41 +1,65 @@ [workspace] resolver = "2" -members = ["jni", "jni-macros"] +members = [ + "common", + "macros", + "lib", + "../../../plugins/analytics-backend-datafusion/rust", + "../../../plugins/parquet-data-format/src/main/rust", +] [workspace.dependencies] +# Arrow / Parquet +arrow = { version = "57.3.0", features = ["ffi"] } +arrow-array = "57.3.0" +arrow-schema = "57.3.0" +arrow-buffer = "57.3.0" +parquet = "57.3.0" + +# DataFusion datafusion = "52.1.0" datafusion-expr = "52.1.0" datafusion-datasource = "52.1.0" datafusion-common = "52.1.0" datafusion-execution = "52.1.0" +datafusion-physical-expr = "52.1.0" datafusion-substrait = "52.1.0" -arrow = { version = "57.3.0", features = ["ffi"] } -arrow-array = "57.3.0" -arrow-schema = "57.3.0" - -parquet = "57.3.0" -object_store = "0.12.5" -url = "2.0" +# Async +tokio = { version = "1.0", features = ["full"] } +futures = "0.3" +tokio-stream = "0.1.17" +# Serialization prost = "0.14" substrait = "=0.62.0" -jni = "0.21" -tokio = { version = "1.0", features = ["full"] } -futures = "0.3" -tokio-stream = "0.1.17" -parking_lot = "0.12.5" -once_cell = "1.21.3" +# Logging log = "0.4" -num_cpus = "1.16" + +# Allocator mimalloc = { version = "0.1.48", default-features = false } + +# Misc +dashmap = "5.5" +num_cpus = "1.16" +object_store = "0.12.5" +url = "2.0" +tempfile = "3.0" +chrono = "0.4" +once_cell = "1.21.3" +parking_lot = "0.12.5" +lazy_static = "1.4.0" criterion = { version = "0.5", features = ["async_tokio"] } -tempfile = "3" + +# Internal +native-bridge-common = { path = "common" } [profile.release] lto = true codegen-units = 1 +incremental = true +debug = "line-tables-only" strip = false [profile.dev] @@ -43,3 +67,5 @@ opt-level = 1 lto = false codegen-units = 16 incremental = true +debug = "full" +strip = false diff --git a/sandbox/libs/dataformat-native/rust/common/Cargo.toml b/sandbox/libs/dataformat-native/rust/common/Cargo.toml new file mode 100644 index 0000000000000..64b2370a5ddaa --- /dev/null +++ b/sandbox/libs/dataformat-native/rust/common/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "native-bridge-common" +version = "0.1.0" +edition = "2021" +description = "Shared Rust utilities for OpenSearch native plugins" +license = "Apache-2.0" + +[lib] +crate-type = ["rlib"] + +[dependencies] +native-bridge-macros = { path = "../macros" } diff --git a/sandbox/libs/dataformat-native/rust/common/src/error.rs b/sandbox/libs/dataformat-native/rust/common/src/error.rs new file mode 100644 index 0000000000000..fc43129f2d1d1 --- /dev/null +++ b/sandbox/libs/dataformat-native/rust/common/src/error.rs @@ -0,0 +1,84 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +//! FFM error handling. +//! +//! Convention: FFM functions return `i64`. +//! - `>= 0` → success (value or pointer) +//! - `< 0` → error. Negate to get a pointer to a heap-allocated error. +//! Call `native_error_message` to read, `native_error_free` to free. + +use std::ffi::CString; +use std::os::raw::c_char; + +/// Heap-allocate the error message and return its pointer as a negative i64. +pub fn into_error_ptr(msg: String) -> i64 { + let c = CString::new(msg).unwrap_or_else(|_| CString::new("error contained null byte").unwrap()); + let ptr = c.into_raw(); + -(ptr as i64) +} + +/// Returns a pointer to the null-terminated error message. +#[no_mangle] +pub unsafe extern "C" fn native_error_message(ptr: i64) -> *const c_char { + ptr as *const c_char +} + +/// Frees a heap-allocated error string. +#[no_mangle] +pub unsafe extern "C" fn native_error_free(ptr: i64) { + if ptr != 0 { + let _ = CString::from_raw(ptr as *mut c_char); + } +} + +/// Deliberately panics with the given message. For testing panic handling. +#[no_mangle] +pub unsafe extern "C" fn native_test_panic(msg_ptr: *const u8, msg_len: i64) -> i64 { + match ::std::panic::catch_unwind(::std::panic::AssertUnwindSafe(|| -> Result { + let msg = std::str::from_utf8_unchecked(std::slice::from_raw_parts(msg_ptr, msg_len as usize)); + panic!("{}", msg); + })) { + Ok(Ok(v)) => v, + Ok(Err(msg)) => into_error_ptr(msg), + Err(panic) => { + let msg = if let Some(s) = panic.downcast_ref::() { + s.clone() + } else if let Some(s) = panic.downcast_ref::<&str>() { + s.to_string() + } else { + "unknown panic".to_string() + }; + into_error_ptr(msg) + } + } +} + +/// Returns an error (not a panic) with the given message. For testing error handling. +#[no_mangle] +pub unsafe extern "C" fn native_test_error(msg_ptr: *const u8, msg_len: i64) -> i64 { + let msg = std::str::from_utf8_unchecked(std::slice::from_raw_parts(msg_ptr, msg_len as usize)); + into_error_ptr(msg.to_string()) +} + +/// Validates a string from (ptr, len). Returns 0 on valid UTF-8, error pointer on invalid input. +/// Used for testing input validation (null ptr, negative len, bad UTF-8). +#[no_mangle] +pub unsafe extern "C" fn native_test_validate_str(ptr: *const u8, len: i64) -> i64 { + if ptr.is_null() { + return into_error_ptr("null string pointer".to_string()); + } + if len < 0 { + return into_error_ptr(format!("negative string length: {}", len)); + } + let bytes = std::slice::from_raw_parts(ptr, len as usize); + match std::str::from_utf8(bytes) { + Ok(_) => 0, + Err(e) => into_error_ptr(format!("invalid UTF-8: {}", e)), + } +} diff --git a/sandbox/libs/dataformat-native/rust/common/src/lib.rs b/sandbox/libs/dataformat-native/rust/common/src/lib.rs new file mode 100644 index 0000000000000..88302f600a4d9 --- /dev/null +++ b/sandbox/libs/dataformat-native/rust/common/src/lib.rs @@ -0,0 +1,15 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +//! Shared Rust utilities for OpenSearch sandbox native plugins. + +pub mod error; +pub mod logger; + +// Re-export the proc macro so plugins use `#[native_bridge_common::ffm_safe]` +pub use native_bridge_macros::ffm_safe; diff --git a/sandbox/libs/dataformat-native/rust/common/src/logger.rs b/sandbox/libs/dataformat-native/rust/common/src/logger.rs new file mode 100644 index 0000000000000..9b0c15765599a --- /dev/null +++ b/sandbox/libs/dataformat-native/rust/common/src/logger.rs @@ -0,0 +1,65 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +//! Rust→Java logging via FFM callback. +//! +//! Java registers a function pointer at startup via `native_logger_init`. +//! Rust calls that pointer to log. No JNI. + +use std::sync::atomic::{AtomicPtr, Ordering}; + +/// Callback signature: `void log(int level, const char* msg, long msg_len)` +type LogCallback = unsafe extern "C" fn(i32, *const u8, i64); + +static LOG_CALLBACK: AtomicPtr<()> = AtomicPtr::new(std::ptr::null_mut()); + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(i32)] +pub enum LogLevel { + Debug = 0, + Info = 1, + Error = 2, +} + +/// Called by Java at startup to register the log callback. +#[no_mangle] +pub unsafe extern "C" fn native_logger_init(callback: LogCallback) { + LOG_CALLBACK.store(callback as *mut (), Ordering::Release); + log(LogLevel::Info, "Native logger initialized successfully"); +} + +pub fn log(level: LogLevel, message: &str) { + let ptr = LOG_CALLBACK.load(Ordering::Acquire); + if ptr.is_null() { + eprintln!("[RUST_LOG_FALLBACK] {:?}: {}", level, message); + return; + } + let callback: LogCallback = unsafe { std::mem::transmute(ptr) }; + unsafe { callback(level as i32, message.as_ptr(), message.len() as i64) }; +} + +#[macro_export] +macro_rules! log_debug { + ($($arg:tt)*) => { + $crate::logger::log($crate::logger::LogLevel::Debug, &format!($($arg)*)) + }; +} + +#[macro_export] +macro_rules! log_info { + ($($arg:tt)*) => { + $crate::logger::log($crate::logger::LogLevel::Info, &format!($($arg)*)) + }; +} + +#[macro_export] +macro_rules! log_error { + ($($arg:tt)*) => { + $crate::logger::log($crate::logger::LogLevel::Error, &format!($($arg)*)) + }; +} diff --git a/sandbox/libs/dataformat-native/rust/lib/Cargo.toml b/sandbox/libs/dataformat-native/rust/lib/Cargo.toml new file mode 100644 index 0000000000000..8fadefbef397e --- /dev/null +++ b/sandbox/libs/dataformat-native/rust/lib/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "opensearch-native-lib" +version = "0.1.0" +edition = "2021" +description = "Single cdylib linking all native modules" +license = "Apache-2.0" + +[lib] +name = "opensearch_native" +crate-type = ["cdylib"] + +[dependencies] +opensearch-datafusion = { path = "../../../../plugins/analytics-backend-datafusion/rust" } +opensearch-parquet-format = { path = "../../../../plugins/parquet-data-format/src/main/rust" } +native-bridge-common = { workspace = true } +mimalloc = { workspace = true } diff --git a/sandbox/libs/dataformat-native/rust/lib/src/lib.rs b/sandbox/libs/dataformat-native/rust/lib/src/lib.rs new file mode 100644 index 0000000000000..5efc63909effb --- /dev/null +++ b/sandbox/libs/dataformat-native/rust/lib/src/lib.rs @@ -0,0 +1,29 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +// ═══════════════════════════════════════════════════════════════════════════════ +// Single cdylib for JDK FFM (Foreign Function & Memory API). +// +// Unlike the JNI approach (RegisterNatives, classloader workarounds), FFM calls +// extern "C" functions directly via SymbolLookup + Linker.downcallHandle(). +// No JNIEnv, no JClass, no classloader binding — just plain C ABI. +// +// This crate: +// 1. Sets the global mimalloc allocator (shared across all plugin rlibs) +// 2. Pulls in plugin rlibs via extern crate (forces linker to include symbols) +// 3. All #[no_mangle] extern "C" functions from the plugin crates are +// automatically available for dlsym/SymbolLookup +// ═══════════════════════════════════════════════════════════════════════════════ + +#[global_allocator] +static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; + +// Pull in plugin rlibs — forces linker to include all #[no_mangle] symbols. +extern crate native_bridge_common; +extern crate opensearch_datafusion; +extern crate opensearch_parquet_format; diff --git a/sandbox/plugins/analytics-backend-datafusion/jni-macros/Cargo.toml b/sandbox/libs/dataformat-native/rust/macros/Cargo.toml similarity index 75% rename from sandbox/plugins/analytics-backend-datafusion/jni-macros/Cargo.toml rename to sandbox/libs/dataformat-native/rust/macros/Cargo.toml index 87af959b9922c..22d7dcf5bee72 100644 --- a/sandbox/plugins/analytics-backend-datafusion/jni-macros/Cargo.toml +++ b/sandbox/libs/dataformat-native/rust/macros/Cargo.toml @@ -1,7 +1,8 @@ [package] -name = "jni-macros" +name = "native-bridge-macros" version = "0.1.0" edition = "2021" +license = "Apache-2.0" [lib] proc-macro = true diff --git a/sandbox/libs/dataformat-native/rust/macros/src/lib.rs b/sandbox/libs/dataformat-native/rust/macros/src/lib.rs new file mode 100644 index 0000000000000..3883358fc4963 --- /dev/null +++ b/sandbox/libs/dataformat-native/rust/macros/src/lib.rs @@ -0,0 +1,64 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +//! Proc macro for FFM bridge functions. +//! +//! Wraps an `extern "C"` function body with `catch_unwind`. The body must +//! return `Result`. On success the `i64` is returned directly. +//! On `Err` or panic, the error message is heap-allocated and returned as a +//! negative pointer (negated `Box::into_raw` address). +//! +//! Java checks: if result < 0, call `native_error_message(-result)` to get +//! the message, then `native_error_free(-result)` to free it. +//! +//! # Usage +//! +//! ```ignore +//! #[ffm_safe] +//! #[no_mangle] +//! pub unsafe extern "C" fn my_func(arg: i64) -> i64 { +//! do_work(arg).map_err(|e| e.to_string()) +//! } +//! ``` + +use proc_macro::TokenStream; +use quote::quote; +use syn::{parse_macro_input, ItemFn}; + +#[proc_macro_attribute] +pub fn ffm_safe(_attr: TokenStream, item: TokenStream) -> TokenStream { + let input = parse_macro_input!(item as ItemFn); + let attrs = &input.attrs; + let vis = &input.vis; + let sig = &input.sig; + let body = &input.block; + + let expanded = quote! { + #(#attrs)* + #vis #sig { + match ::std::panic::catch_unwind(::std::panic::AssertUnwindSafe( + || -> ::std::result::Result #body + )) { + Ok(Ok(v)) => v, + Ok(Err(msg)) => native_bridge_common::error::into_error_ptr(msg), + Err(panic) => { + let msg = if let Some(s) = panic.downcast_ref::() { + s.clone() + } else if let Some(s) = panic.downcast_ref::<&str>() { + s.to_string() + } else { + "unknown panic".to_string() + }; + native_bridge_common::error::into_error_ptr(msg) + } + } + } + }; + + expanded.into() +} diff --git a/sandbox/libs/native-bridge-spi/src/main/java/org/opensearch/nativebridge/spi/ArrowExport.java b/sandbox/libs/dataformat-native/src/main/java/org/opensearch/nativebridge/spi/ArrowExport.java similarity index 100% rename from sandbox/libs/native-bridge-spi/src/main/java/org/opensearch/nativebridge/spi/ArrowExport.java rename to sandbox/libs/dataformat-native/src/main/java/org/opensearch/nativebridge/spi/ArrowExport.java diff --git a/sandbox/libs/dataformat-native/src/main/java/org/opensearch/nativebridge/spi/NativeCall.java b/sandbox/libs/dataformat-native/src/main/java/org/opensearch/nativebridge/spi/NativeCall.java new file mode 100644 index 0000000000000..e5a2de8e92f1c --- /dev/null +++ b/sandbox/libs/dataformat-native/src/main/java/org/opensearch/nativebridge/spi/NativeCall.java @@ -0,0 +1,261 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.nativebridge.spi; + +import java.io.IOException; +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.lang.invoke.MethodHandle; +import java.nio.charset.StandardCharsets; + +/** + * Thin wrapper around a confined {@link Arena} for FFM calls. + * + *

Arena strategy

+ *
    + *
  • Confined Arena (this class) — for short-lived, per-call allocations: strings + * passed to native functions, out-pointers for return values, temp buffers. These are created at + * call start, closed immediately after — these are deterministic and cheap. + * Confined means single-thread only, which is fine since FFM calls are synchronous.
  • + *
  • Global Arena ({@link NativeLibraryLoader}) — for the {@link java.lang.foreign.SymbolLookup} + * that keeps the native library loaded for the JVM lifetime. Never freed.
  • + *
  • No Arena needed — for opaque native pointers ({@code long} values returned by + * functions like {@code createGlobalRuntime}, {@code createReader}). These are Rust heap + * addresses cast to {@code i64}. They are not {@link MemorySegment}s, don't need an Arena, + * and live until explicitly freed by the corresponding close/free function.
  • + *
+ * + *
{@code
+ * try (var call = new NativeCall()) {
+ *     var name = call.str("hello");
+ *     long ptr = call.invoke(CREATE, name.segment(), name.len());
+ *     // ptr is a native pointer (long) — lives beyond this Arena
+ * }
+ * // Arena closed, temp string freed. ptr still valid.
+ * }
+ */ +public final class NativeCall implements AutoCloseable { + + private final Arena arena; + private boolean closed; + + public NativeCall() { + this.arena = Arena.ofConfined(); + } + + private void ensureOpen() { + if (closed) { + throw new IllegalStateException("NativeCall already closed"); + } + } + + // ---- String marshaling (single UTF-8 encode) ---- + + /** + * A UTF-8 string segment paired with its byte length. + * Avoids double-encoding when both segment and length are needed. + * + * @param segment the native memory segment containing UTF-8 bytes + * @param len the byte length of the UTF-8 encoding + */ + public record Str(MemorySegment segment, long len) { + } + + /** + * Allocate a UTF-8 string and return both the segment and its byte length. + * Encodes the string exactly once. + * + * @param s the string to marshal (must not be null) + * @return a {@link Str} containing the segment and its byte length + * @throws NullPointerException if s is null + * @throws IllegalStateException if this NativeCall is closed + */ + public Str str(String s) { + ensureOpen(); + if (s == null) { + throw new NullPointerException("Cannot marshal null string to native"); + } + byte[] bytes = s.getBytes(StandardCharsets.UTF_8); + return new Str(arena.allocateFrom(ValueLayout.JAVA_BYTE, bytes), bytes.length); + } + + // ---- String array marshaling ---- + + /** + * Parallel pointer and length arrays for passing a string array to native code. + * Matches the Rust convention of {@code (*const *const u8, *const i64, count)}. + * + * @param ptrs memory segment containing pointers to each string's bytes + * @param lens memory segment containing the byte length of each string + * @param count the number of strings + */ + public record StrArray(MemorySegment ptrs, MemorySegment lens, long count) { + } + + /** + * Marshal a Java string array into parallel native arrays of pointers and lengths. + * + * @param strings the strings to marshal (must not be null, elements must not be null) + * @return a {@link StrArray} with pointer array, length array, and count + * @throws NullPointerException if strings or any element is null + * @throws IllegalStateException if this NativeCall is closed + */ + public StrArray strArray(String[] strings) { + ensureOpen(); + if (strings == null) { + throw new NullPointerException("Cannot marshal null string array to native"); + } + MemorySegment ptrs = arena.allocate(ValueLayout.ADDRESS, strings.length); + MemorySegment lens = arena.allocate(ValueLayout.JAVA_LONG, strings.length); + for (int i = 0; i < strings.length; i++) { + if (strings[i] == null) { + throw new NullPointerException("Cannot marshal null string at index " + i); + } + byte[] bytes = strings[i].getBytes(StandardCharsets.UTF_8); + ptrs.setAtIndex(ValueLayout.ADDRESS, i, arena.allocateFrom(ValueLayout.JAVA_BYTE, bytes)); + lens.setAtIndex(ValueLayout.JAVA_LONG, i, bytes.length); + } + return new StrArray(ptrs, lens, strings.length); + } + + // ---- Out-buffer with overflow detection ---- + + /** + * A bounded output buffer for native functions that write results into caller-provided memory. + * Provides overflow detection when reading the actual length written by native code. + * + * @param data the buffer segment native code writes into + * @param lenOut out-pointer where native code writes the actual byte count + * @param capacity the allocated capacity of the data buffer + */ + public record OutBuffer(MemorySegment data, MemorySegment lenOut, int capacity) { + + /** + * Returns the actual length written by native code. + * + * @return the number of bytes written + * @throws IllegalStateException if the native code reported more bytes than the buffer capacity + */ + public int actualLength() { + int len = (int) lenOut.get(ValueLayout.JAVA_LONG, 0); + if (len < 0) { + throw new IllegalStateException("Native output reported negative length: " + len); + } + if (len > capacity) { + throw new IllegalStateException("Native output (" + len + " bytes) exceeds buffer capacity (" + capacity + " bytes)"); + } + return len; + } + + /** Extract the written bytes as a Java byte array. */ + public byte[] toByteArray() { + return data.asSlice(0, actualLength()).toArray(ValueLayout.JAVA_BYTE); + } + } + + /** + * Allocate a bounded output buffer with overflow detection. + * + * @param capacity the buffer size in bytes + * @return an {@link OutBuffer} containing the data segment, length out-pointer, and capacity + * @throws IllegalStateException if this NativeCall is closed + */ + public OutBuffer outBuffer(int capacity) { + ensureOpen(); + return new OutBuffer(arena.allocate(capacity), arena.allocate(ValueLayout.JAVA_LONG), capacity); + } + + // ---- Simple allocations ---- + + /** + * Allocate an out-pointer for {@code int}. + * Read with {@code seg.get(ValueLayout.JAVA_INT, 0)}. + */ + public MemorySegment intOut() { + ensureOpen(); + return arena.allocate(ValueLayout.JAVA_INT); + } + + /** Allocate an out-pointer for {@code long}. */ + public MemorySegment longOut() { + ensureOpen(); + return arena.allocate(ValueLayout.JAVA_LONG); + } + + /** Allocate a byte buffer of the given size. */ + public MemorySegment buf(int size) { + ensureOpen(); + return arena.allocate(size); + } + + /** Allocate a segment from a byte array. */ + public MemorySegment bytes(byte[] data) { + ensureOpen(); + if (data == null) { + throw new NullPointerException("Cannot marshal null byte array to native"); + } + return arena.allocateFrom(ValueLayout.JAVA_BYTE, data); + } + + // ---- Invocation ---- + + /** + * Invoke a MethodHandle that returns {@code long} and check the result. + * Throws {@link RuntimeException} on native error. + */ + public long invoke(MethodHandle handle, Object... args) { + ensureOpen(); + try { + long result = (long) handle.invokeWithArguments(args); + return NativeLibraryLoader.checkResult(result); + } catch (RuntimeException e) { + throw e; + } catch (Throwable t) { + throw new RuntimeException(t); + } + } + + /** + * Invoke a MethodHandle that returns {@code long} and check the result. + * Throws {@link IOException} on native error. + */ + public long invokeIO(MethodHandle handle, Object... args) throws IOException { + ensureOpen(); + try { + long result = (long) handle.invokeWithArguments(args); + return NativeLibraryLoader.checkResultIO(result); + } catch (IOException | RuntimeException e) { + throw e; + } catch (Throwable t) { + throw new IOException(t); + } + } + + /** + * Invoke a void MethodHandle. No return value, no error check. + * Use for fire-and-forget calls like close, shutdown, init. + * This is static because it does not require an Arena. + */ + public static void invokeVoid(MethodHandle handle, Object... args) { + try { + handle.invokeWithArguments(args); + } catch (RuntimeException e) { + throw e; + } catch (Throwable t) { + throw new RuntimeException(t); + } + } + + @Override + public void close() { + closed = true; + arena.close(); + } +} diff --git a/sandbox/libs/dataformat-native/src/main/java/org/opensearch/nativebridge/spi/NativeLibraryLoader.java b/sandbox/libs/dataformat-native/src/main/java/org/opensearch/nativebridge/spi/NativeLibraryLoader.java new file mode 100644 index 0000000000000..16e22da13ab1a --- /dev/null +++ b/sandbox/libs/dataformat-native/src/main/java/org/opensearch/nativebridge/spi/NativeLibraryLoader.java @@ -0,0 +1,170 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.nativebridge.spi; + +import org.opensearch.common.SuppressForbidden; + +import java.io.IOException; +import java.io.InputStream; +import java.lang.foreign.Arena; +import java.lang.foreign.FunctionDescriptor; +import java.lang.foreign.Linker; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SymbolLookup; +import java.lang.foreign.ValueLayout; +import java.lang.invoke.MethodHandle; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardCopyOption; + +/** + * Loads the unified native library and provides a shared {@link SymbolLookup}. + * + *

Uses the initialization-on-demand holder idiom for thread-safe lazy loading + * without explicit synchronization. The JVM guarantees that the holder class is + * initialized exactly once, on first access, with full happens-before semantics. + * + *

Error convention: FFM functions return {@code i64}. If {@code result < 0}, + * negate it to get a pointer to a heap-allocated error string. Call + * {@link #checkResult(long)} to automatically convert errors to exceptions. + */ +public final class NativeLibraryLoader { + + private static final String LIBRARY_NAME = "opensearch_native"; + + private NativeLibraryLoader() {} + + // ---- Initialization-on-demand holder ---- + + /** + * Holder class for lazy, thread-safe initialization. + * The JVM guarantees this class is initialized exactly once on first access, + * with full happens-before semantics — no volatile, no synchronized needed. + */ + private static final class Holder { + static final SymbolLookup LOOKUP; + static final MethodHandle ERROR_MESSAGE; + static final MethodHandle ERROR_FREE; + + static { + LOOKUP = loadLibrary(); + Linker linker = Linker.nativeLinker(); + ERROR_MESSAGE = linker.downcallHandle( + LOOKUP.find("native_error_message").orElseThrow(), + FunctionDescriptor.of(ValueLayout.ADDRESS, ValueLayout.JAVA_LONG) + ); + ERROR_FREE = linker.downcallHandle( + LOOKUP.find("native_error_free").orElseThrow(), + FunctionDescriptor.ofVoid(ValueLayout.JAVA_LONG) + ); + // Register the Rust→Java log callback + LOOKUP.find("native_logger_init").ifPresent(sym -> RustLoggerBridge.register(linker, sym)); + } + } + + /** Returns the shared {@link SymbolLookup}. Loads the library on first call. */ + public static SymbolLookup symbolLookup() { + return Holder.LOOKUP; + } + + // ---- Error handling ---- + + /** + * Reads a native error message from the given error pointer and frees it. + * The error pointer must have been produced by Rust's {@code into_error_ptr}. + * + * @param errPtr the positive error pointer (already negated by the caller) + * @return the error message string + */ + private static String readAndFreeError(long errPtr) { + if (errPtr == 0) { + return "native error with null pointer"; + } + String msg; + try { + MemorySegment msgSeg = (MemorySegment) Holder.ERROR_MESSAGE.invokeExact(errPtr); + // CString is null-terminated — reinterpret to max addressable range, + // getString stops at the null terminator. This is the standard FFM + // pattern for reading C strings of unknown length. + msg = msgSeg.reinterpret(Long.MAX_VALUE).getString(0); + } catch (Throwable t) { + msg = "failed to read native error (ptr=0x" + Long.toHexString(errPtr) + ")"; + } finally { + try { + Holder.ERROR_FREE.invokeExact(errPtr); + } catch (Throwable ignored) {} + } + return msg; + } + + /** + * Checks an FFM result. If {@code >= 0}, returns it. If {@code < 0}, reads + * the native error message, frees it, and throws a {@link RuntimeException}. + */ + public static long checkResult(long result) { + if (result >= 0) { + return result; + } + throw new RuntimeException(readAndFreeError(-result)); + } + + /** + * Same as {@link #checkResult} but throws {@link IOException}. + */ + public static long checkResultIO(long result) throws IOException { + if (result >= 0) { + return result; + } + throw new IOException(readAndFreeError(-result)); + } + + // ---- Library loading ---- + + @SuppressForbidden(reason = "Needs temp directory to extract native library from classpath") + private static SymbolLookup loadLibrary() { + String libFile = PlatformHelper.getPlatformLibraryName(LIBRARY_NAME); + + // Try java.library.path + String javaLibPath = System.getProperty("java.library.path", ""); + for (String dir : javaLibPath.split(System.getProperty("path.separator"))) { + if (dir.isEmpty()) { + continue; + } + Path candidate = Path.of(dir, libFile); + if (Files.exists(candidate)) { + return SymbolLookup.libraryLookup(candidate, Arena.global()); + } + } + + // Try classpath resources + String platformDir = PlatformHelper.getPlatformDirectory(); + String resourcePath = "/native/" + platformDir + "/" + libFile; + try (InputStream is = NativeLibraryLoader.class.getResourceAsStream(resourcePath)) { + if (is != null) { + Path tempDir = Files.createTempDirectory("opensearch-native-"); + Path tempLib = tempDir.resolve(libFile); + Files.copy(is, tempLib, StandardCopyOption.REPLACE_EXISTING); + tempLib.toFile().setExecutable(true); + tempLib.toFile().deleteOnExit(); + tempDir.toFile().deleteOnExit(); + return SymbolLookup.libraryLookup(tempLib, Arena.global()); + } + } catch (IOException e) { + // fall through + } + + // Try native.lib.path system property (for tests) + String nativeLibPath = System.getProperty("native.lib.path"); + if (nativeLibPath != null) { + return SymbolLookup.libraryLookup(Path.of(nativeLibPath), Arena.global()); + } + + throw new RuntimeException("Failed to load native library '" + LIBRARY_NAME + "'"); + } +} diff --git a/sandbox/libs/native-bridge-spi/src/main/java/org/opensearch/nativebridge/spi/PlatformHelper.java b/sandbox/libs/dataformat-native/src/main/java/org/opensearch/nativebridge/spi/PlatformHelper.java similarity index 100% rename from sandbox/libs/native-bridge-spi/src/main/java/org/opensearch/nativebridge/spi/PlatformHelper.java rename to sandbox/libs/dataformat-native/src/main/java/org/opensearch/nativebridge/spi/PlatformHelper.java diff --git a/sandbox/libs/dataformat-native/src/main/java/org/opensearch/nativebridge/spi/RustLoggerBridge.java b/sandbox/libs/dataformat-native/src/main/java/org/opensearch/nativebridge/spi/RustLoggerBridge.java new file mode 100644 index 0000000000000..ce7a9ce5cbd93 --- /dev/null +++ b/sandbox/libs/dataformat-native/src/main/java/org/opensearch/nativebridge/spi/RustLoggerBridge.java @@ -0,0 +1,69 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.nativebridge.spi; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.lang.foreign.Arena; +import java.lang.foreign.FunctionDescriptor; +import java.lang.foreign.Linker; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.lang.invoke.MethodHandle; +import java.lang.invoke.MethodHandles; +import java.lang.invoke.MethodType; +import java.nio.charset.StandardCharsets; + +/** + * Bridge that allows Rust to log through Java's Log4j via an FFM callback. + * + *

At startup, {@link NativeLibraryLoader} registers {@link #log} as a function pointer + * with the native library. Rust calls it directly — no JNI. + */ +public class RustLoggerBridge { + + private static final Logger logger = LogManager.getLogger(RustLoggerBridge.class); + + /** + * Called from Rust via the registered function pointer. + * Signature must match: {@code void(int level, MemorySegment msgPtr, long msgLen)} + */ + static void log(int level, MemorySegment msgPtr, long msgLen) { + String message = new String(msgPtr.reinterpret(msgLen).toArray(ValueLayout.JAVA_BYTE), StandardCharsets.UTF_8); + switch (level) { + case 0 -> logger.debug(message); + case 1 -> logger.info(message); + case 2 -> logger.error(message); + default -> logger.warn(message); + } + } + + /** + * Creates an upcall stub for {@link #log} and registers it with the native library. + * Called once by {@link NativeLibraryLoader}. + */ + static void register(Linker linker, MemorySegment initLoggerSymbol) { + try { + MethodHandle logHandle = MethodHandles.lookup() + .findStatic(RustLoggerBridge.class, "log", MethodType.methodType(void.class, int.class, MemorySegment.class, long.class)); + MemorySegment upcallStub = linker.upcallStub( + logHandle, + FunctionDescriptor.ofVoid(ValueLayout.JAVA_INT, ValueLayout.ADDRESS, ValueLayout.JAVA_LONG), + Arena.global() + ); + MethodHandle initLogger = linker.downcallHandle(initLoggerSymbol, FunctionDescriptor.ofVoid(ValueLayout.ADDRESS)); + initLogger.invokeExact(upcallStub); + } catch (Throwable t) { + logger.error("Failed to register native logger callback", t); + } + } + + private RustLoggerBridge() {} +} diff --git a/sandbox/libs/native-bridge-spi/src/main/java/org/opensearch/nativebridge/spi/package-info.java b/sandbox/libs/dataformat-native/src/main/java/org/opensearch/nativebridge/spi/package-info.java similarity index 100% rename from sandbox/libs/native-bridge-spi/src/main/java/org/opensearch/nativebridge/spi/package-info.java rename to sandbox/libs/dataformat-native/src/main/java/org/opensearch/nativebridge/spi/package-info.java diff --git a/sandbox/libs/dataformat-native/src/test/java/org/opensearch/nativebridge/spi/NativePanicHandlingTests.java b/sandbox/libs/dataformat-native/src/test/java/org/opensearch/nativebridge/spi/NativePanicHandlingTests.java new file mode 100644 index 0000000000000..b592048706a8e --- /dev/null +++ b/sandbox/libs/dataformat-native/src/test/java/org/opensearch/nativebridge/spi/NativePanicHandlingTests.java @@ -0,0 +1,111 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.nativebridge.spi; + +import org.opensearch.test.OpenSearchTestCase; + +import java.lang.foreign.FunctionDescriptor; +import java.lang.foreign.Linker; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SymbolLookup; +import java.lang.foreign.ValueLayout; +import java.lang.invoke.MethodHandle; + +/** + * Tests that native panics and errors are properly caught and surfaced as Java exceptions. + */ +public class NativePanicHandlingTests extends OpenSearchTestCase { + + private static final MethodHandle TEST_PANIC; + private static final MethodHandle TEST_ERROR; + private static final MethodHandle TEST_VALIDATE_STR; + + static { + SymbolLookup lib = NativeLibraryLoader.symbolLookup(); + Linker linker = Linker.nativeLinker(); + TEST_PANIC = linker.downcallHandle( + lib.find("native_test_panic").orElseThrow(), + FunctionDescriptor.of(ValueLayout.JAVA_LONG, ValueLayout.ADDRESS, ValueLayout.JAVA_LONG) + ); + TEST_ERROR = linker.downcallHandle( + lib.find("native_test_error").orElseThrow(), + FunctionDescriptor.of(ValueLayout.JAVA_LONG, ValueLayout.ADDRESS, ValueLayout.JAVA_LONG) + ); + TEST_VALIDATE_STR = linker.downcallHandle( + lib.find("native_test_validate_str").orElseThrow(), + FunctionDescriptor.of(ValueLayout.JAVA_LONG, ValueLayout.ADDRESS, ValueLayout.JAVA_LONG) + ); + } + + public void testPanicIsCaughtAsException() { + RuntimeException ex = expectThrows(RuntimeException.class, () -> { + try (var call = new NativeCall()) { + var msg = call.str("test panic message"); + call.invoke(TEST_PANIC, msg.segment(), msg.len()); + } + }); + assertTrue("Should contain panic message, got: " + ex.getMessage(), ex.getMessage().contains("test panic message")); + } + + public void testErrorIsCaughtAsException() { + RuntimeException ex = expectThrows(RuntimeException.class, () -> { + try (var call = new NativeCall()) { + var msg = call.str("test error message"); + call.invoke(TEST_ERROR, msg.segment(), msg.len()); + } + }); + assertTrue("Should contain error message, got: " + ex.getMessage(), ex.getMessage().contains("test error message")); + } + + public void testPanicMessagePreservedThroughCheckResultIO() { + Exception ex = expectThrows(Exception.class, () -> { + try (var call = new NativeCall()) { + var msg = call.str("io panic test"); + call.invokeIO(TEST_PANIC, msg.segment(), msg.len()); + } + }); + assertTrue("Should contain panic message, got: " + ex.getMessage(), ex.getMessage().contains("io panic test")); + } + + public void testSuccessResultPassesThrough() throws Throwable { + // native_test_error with empty string still returns negative, so test with a known-good function. + // Use native_error_free(0) which is a no-op that doesn't crash — but it returns void. + // Instead, just verify that checkResult(0) returns 0 and checkResult(42) returns 42. + assertEquals(0L, NativeLibraryLoader.checkResult(0)); + assertEquals(42L, NativeLibraryLoader.checkResult(42)); + assertEquals(Long.MAX_VALUE, NativeLibraryLoader.checkResult(Long.MAX_VALUE)); + } + + public void testNullPointerReturnsError() { + RuntimeException ex = expectThrows(RuntimeException.class, () -> { + try (var call = new NativeCall()) { + call.invoke(TEST_VALIDATE_STR, MemorySegment.NULL, 5L); + } + }); + assertTrue("Should mention null, got: " + ex.getMessage(), ex.getMessage().contains("null")); + } + + public void testNegativeLengthReturnsError() { + RuntimeException ex = expectThrows(RuntimeException.class, () -> { + try (var call = new NativeCall()) { + var s = call.str("hello"); + call.invoke(TEST_VALIDATE_STR, s.segment(), -1L); + } + }); + assertTrue("Should mention negative, got: " + ex.getMessage(), ex.getMessage().contains("negative")); + } + + public void testValidUtf8Succeeds() { + try (var call = new NativeCall()) { + var s = call.str("hello"); + long result = call.invoke(TEST_VALIDATE_STR, s.segment(), s.len()); + assertEquals(0L, result); + } + } +} diff --git a/sandbox/libs/native-bridge-spi/src/test/java/org/opensearch/nativebridge/spi/PlatformHelperTests.java b/sandbox/libs/dataformat-native/src/test/java/org/opensearch/nativebridge/spi/PlatformHelperTests.java similarity index 100% rename from sandbox/libs/native-bridge-spi/src/test/java/org/opensearch/nativebridge/spi/PlatformHelperTests.java rename to sandbox/libs/dataformat-native/src/test/java/org/opensearch/nativebridge/spi/PlatformHelperTests.java diff --git a/sandbox/libs/native-bridge-spi/src/test/java/org/opensearch/nativebridge/spi/RustLoggerBridgeTests.java b/sandbox/libs/dataformat-native/src/test/java/org/opensearch/nativebridge/spi/RustLoggerBridgeTests.java similarity index 59% rename from sandbox/libs/native-bridge-spi/src/test/java/org/opensearch/nativebridge/spi/RustLoggerBridgeTests.java rename to sandbox/libs/dataformat-native/src/test/java/org/opensearch/nativebridge/spi/RustLoggerBridgeTests.java index 6169feb6b152c..2e5b0f40db32b 100644 --- a/sandbox/libs/native-bridge-spi/src/test/java/org/opensearch/nativebridge/spi/RustLoggerBridgeTests.java +++ b/sandbox/libs/dataformat-native/src/test/java/org/opensearch/nativebridge/spi/RustLoggerBridgeTests.java @@ -15,53 +15,65 @@ import org.opensearch.test.MockLogAppender; import org.opensearch.test.OpenSearchTestCase; +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.nio.charset.StandardCharsets; + public class RustLoggerBridgeTests extends OpenSearchTestCase { + private static MemorySegment toSegment(Arena arena, String s) { + return arena.allocateFrom(ValueLayout.JAVA_BYTE, s.getBytes(StandardCharsets.UTF_8)); + } + public void testLogDebug() throws Exception { Logger logger = LogManager.getLogger(RustLoggerBridge.class); Configurator.setLevel(logger, Level.DEBUG); - try (MockLogAppender appender = MockLogAppender.createForLoggers(logger)) { + try (MockLogAppender appender = MockLogAppender.createForLoggers(logger); Arena arena = Arena.ofConfined()) { appender.addExpectation( new MockLogAppender.SeenEventExpectation("debug", RustLoggerBridge.class.getName(), Level.DEBUG, "debug message") ); - RustLoggerBridge.log(RustLoggerBridge.LogLevel.DEBUG.ordinal(), "debug message"); + RustLoggerBridge.log(0, toSegment(arena, "debug message"), "debug message".length()); appender.assertAllExpectationsMatched(); } } public void testLogInfo() throws Exception { - try (MockLogAppender appender = MockLogAppender.createForLoggers(LogManager.getLogger(RustLoggerBridge.class))) { + try ( + MockLogAppender appender = MockLogAppender.createForLoggers(LogManager.getLogger(RustLoggerBridge.class)); + Arena arena = Arena.ofConfined() + ) { appender.addExpectation( new MockLogAppender.SeenEventExpectation("info", RustLoggerBridge.class.getName(), Level.INFO, "info message") ); - RustLoggerBridge.log(RustLoggerBridge.LogLevel.INFO.ordinal(), "info message"); + RustLoggerBridge.log(1, toSegment(arena, "info message"), "info message".length()); appender.assertAllExpectationsMatched(); } } public void testLogError() throws Exception { - try (MockLogAppender appender = MockLogAppender.createForLoggers(LogManager.getLogger(RustLoggerBridge.class))) { + try ( + MockLogAppender appender = MockLogAppender.createForLoggers(LogManager.getLogger(RustLoggerBridge.class)); + Arena arena = Arena.ofConfined() + ) { appender.addExpectation( new MockLogAppender.SeenEventExpectation("error", RustLoggerBridge.class.getName(), Level.ERROR, "error message") ); - RustLoggerBridge.log(RustLoggerBridge.LogLevel.ERROR.ordinal(), "error message"); + RustLoggerBridge.log(2, toSegment(arena, "error message"), "error message".length()); appender.assertAllExpectationsMatched(); } } public void testLogInvalidLevelIsIgnored() throws Exception { - try (MockLogAppender appender = MockLogAppender.createForLoggers(LogManager.getLogger(RustLoggerBridge.class))) { + try ( + MockLogAppender appender = MockLogAppender.createForLoggers(LogManager.getLogger(RustLoggerBridge.class)); + Arena arena = Arena.ofConfined() + ) { appender.addExpectation( new MockLogAppender.UnseenEventExpectation("negative", RustLoggerBridge.class.getName(), Level.DEBUG, "bad") ); - appender.addExpectation( - new MockLogAppender.UnseenEventExpectation("negative-info", RustLoggerBridge.class.getName(), Level.INFO, "bad") - ); - appender.addExpectation( - new MockLogAppender.UnseenEventExpectation("negative-error", RustLoggerBridge.class.getName(), Level.ERROR, "bad") - ); - RustLoggerBridge.log(-1, "bad"); - RustLoggerBridge.log(99, "bad"); + RustLoggerBridge.log(-1, toSegment(arena, "bad"), 3); + RustLoggerBridge.log(99, toSegment(arena, "bad"), 3); appender.assertAllExpectationsMatched(); } } diff --git a/sandbox/libs/native-bridge-spi/build.gradle b/sandbox/libs/native-bridge-spi/build.gradle deleted file mode 100644 index b1d4ef3b66eac..0000000000000 --- a/sandbox/libs/native-bridge-spi/build.gradle +++ /dev/null @@ -1,49 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -apply plugin: 'opensearch.build' - -description = 'Shared native bridge utilities for OpenSearch sandbox plugins' - -dependencies { - api project(':libs:opensearch-core') - api project(':libs:opensearch-common') - implementation "org.apache.logging.log4j:log4j-api:${versions.log4j}" - compileOnly "org.apache.arrow:arrow-c-data:${versions.arrow}" - - testImplementation "com.carrotsearch.randomizedtesting:randomizedtesting-runner:${versions.randomizedrunner}" - testImplementation "junit:junit:${versions.junit}" - testImplementation "org.hamcrest:hamcrest:${versions.hamcrest}" - testImplementation(project(":test:framework")) { - exclude group: 'org.opensearch', module: 'opensearch-core' - exclude group: 'org.opensearch', module: 'opensearch-common' - } -} - -tasks.named('forbiddenApisMain').configure { - replaceSignatureFiles 'jdk-signatures' -} - -jarHell.enabled = false - -tasks.named('thirdPartyAudit').configure { - ignoreMissingClasses( - // from log4j - 'org.osgi.framework.Bundle', - 'org.osgi.framework.BundleActivator', - 'org.osgi.framework.BundleContext', - 'org.osgi.framework.BundleEvent', - 'org.osgi.framework.FrameworkUtil', - 'org.osgi.framework.ServiceReference', - 'org.osgi.framework.ServiceRegistration', - 'org.osgi.framework.SynchronousBundleListener', - 'org.osgi.framework.wiring.BundleRevision', - 'org.osgi.framework.wiring.BundleWire', - 'org.osgi.framework.wiring.BundleWiring' - ) -} diff --git a/sandbox/libs/native-bridge-spi/src/main/java/org/opensearch/nativebridge/spi/RustLoggerBridge.java b/sandbox/libs/native-bridge-spi/src/main/java/org/opensearch/nativebridge/spi/RustLoggerBridge.java deleted file mode 100644 index 4c88b1ca2ad23..0000000000000 --- a/sandbox/libs/native-bridge-spi/src/main/java/org/opensearch/nativebridge/spi/RustLoggerBridge.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.nativebridge.spi; - -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - -/** - * Bridge class that allows Rust code to log messages through Java's logging framework via JNI. - * - *

Rust calls this using the fully qualified class name: - * {@code org/opensearch/nativebridge/spi/RustLoggerBridge} - */ -public class RustLoggerBridge { - - // TODO - Extend logger to include the caller and implement lazy loading as well - private static final Logger logger = LogManager.getLogger(RustLoggerBridge.class); - - /** - * Log levels supported by the Rust-to-Java logging bridge. - */ - public enum LogLevel { - /** Debug level logging. */ - DEBUG, - /** Info level logging. */ - INFO, - /** Error level logging. */ - ERROR - } - - /** - * Called from Rust via JNI. - * @param level log level ordinal (0=DEBUG, 1=INFO, 2=ERROR) - * @param message the message to log - */ - public static void log(int level, String message) { - LogLevel[] levels = LogLevel.values(); - if (level < 0 || level >= levels.length) { - return; - } - switch (levels[level]) { - case DEBUG: - logger.debug(message); - break; - case INFO: - logger.info(message); - break; - case ERROR: - logger.error(message); - break; - } - } - - private RustLoggerBridge() {} -} diff --git a/sandbox/libs/native-bridge-spi/src/main/rust/Cargo.toml b/sandbox/libs/native-bridge-spi/src/main/rust/Cargo.toml deleted file mode 100644 index 81da54b58278d..0000000000000 --- a/sandbox/libs/native-bridge-spi/src/main/rust/Cargo.toml +++ /dev/null @@ -1,12 +0,0 @@ -[package] -name = "native-bridge-spi" -version = "0.1.0" -edition = "2021" -description = "Shared Rust utilities for OpenSearch sandbox native plugins" -license = "Apache-2.0" - -[lib] -crate-type = ["rlib"] - -[dependencies] -jni = "0.21" diff --git a/sandbox/libs/native-bridge-spi/src/main/rust/src/lib.rs b/sandbox/libs/native-bridge-spi/src/main/rust/src/lib.rs deleted file mode 100644 index 0ae47f00e2867..0000000000000 --- a/sandbox/libs/native-bridge-spi/src/main/rust/src/lib.rs +++ /dev/null @@ -1,96 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -//! Shared Rust utilities for OpenSearch sandbox native plugins. - -use jni::objects::JValue; -use jni::{JNIEnv, JavaVM}; -use std::sync::OnceLock; - -static JAVA_VM: OnceLock = OnceLock::new(); - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -#[repr(i32)] -pub enum LogLevel { - Debug = 0, - Info = 1, - Error = 2, -} - -impl LogLevel { - fn as_i32(self) -> i32 { - self as i32 - } -} - -/// Initialize the logger from a JNIEnv reference. -pub fn init_logger_from_env(env: &JNIEnv) { - if JAVA_VM.get().is_some() { - // return early as env has already been initialized - return; - } - match env.get_java_vm() { - Ok(jvm) => { - if let Err(_) = JAVA_VM.set(jvm) { - // Another thread initialized it between our check and set; that's fine. - } else { - // Log through the Java bridge to confirm end-to-end setup; - // absence of this message indicates a logger initialization failure. - log(LogLevel::Info, "Native logger initialized successfully"); - } - } - Err(e) => eprintln!("[RUST_LOG_FALLBACK] Failed to get JavaVM: {}", e), - } -} - -fn log(level: LogLevel, message: &str) { - if let Some(jvm) = JAVA_VM.get() { - if let Ok(mut env) = jvm.attach_current_thread() { - let result = (|| -> Result<(), Box> { - let class = env.find_class("org/opensearch/nativebridge/spi/RustLoggerBridge")?; - let java_message = env.new_string(message)?; - env.call_static_method( - class, - "log", - "(ILjava/lang/String;)V", - &[JValue::Int(level.as_i32()), (&java_message).into()], - )?; - Ok(()) - })(); - if result.is_err() { - eprintln!("[RUST_LOG_FALLBACK] {:?}: {}", level, message); - } - } - } -} - -#[macro_export] -macro_rules! log_debug { - ($($arg:tt)*) => { - $crate::__internal_log($crate::LogLevel::Debug, &format!($($arg)*)) - }; -} - -#[macro_export] -macro_rules! log_info { - ($($arg:tt)*) => { - $crate::__internal_log($crate::LogLevel::Info, &format!($($arg)*)) - }; -} - -#[macro_export] -macro_rules! log_error { - ($($arg:tt)*) => { - $crate::__internal_log($crate::LogLevel::Error, &format!($($arg)*)) - }; -} - -#[doc(hidden)] -pub fn __internal_log(level: LogLevel, message: &str) { - log(level, message); -} diff --git a/sandbox/plugins/analytics-backend-datafusion/build.gradle b/sandbox/plugins/analytics-backend-datafusion/build.gradle index f2ac0074900dc..855d2eaa6a0fc 100644 --- a/sandbox/plugins/analytics-backend-datafusion/build.gradle +++ b/sandbox/plugins/analytics-backend-datafusion/build.gradle @@ -12,16 +12,18 @@ opensearchplugin { extendedPlugins = ['analytics-engine'] } -def buildType = project.hasProperty('rustDebug') ? 'debug' : 'release' +java { sourceCompatibility = JavaVersion.toVersion(25); targetCompatibility = JavaVersion.toVersion(25) } dependencies { + // Shared native bridge lib (provides the unified .so and FFM SymbolLookup) + implementation project(':sandbox:libs:dataformat-native') + // Provided at runtime by the parent analytics-engine plugin; compile-only to avoid jar hell. compileOnly project(':sandbox:libs:analytics-framework') compileOnly "org.apache.logging.log4j:log4j-api:${versions.log4j}" compileOnly "org.apache.logging.log4j:log4j-core:${versions.log4j}" - // TODO: check if the dependencies need to be in analytics plugin // Apache Arrow dependencies implementation "org.apache.arrow:arrow-memory-core:18.3.0" implementation "org.apache.arrow:arrow-memory-unsafe:18.3.0" @@ -38,62 +40,11 @@ dependencies { implementation "com.fasterxml.jackson.core:jackson-databind:${versions.jackson_databind}" } -// Rust native library build -task buildRustLibrary(type: Exec) { - description = 'Build the Rust JNI library using Cargo' - group = 'build' - workingDir file('jni') - - def cargoExecutable = 'cargo' - [System.getenv('HOME') + '/.cargo/bin/cargo', '/usr/local/bin/cargo'].each { path -> - if (new File(path).exists()) { cargoExecutable = path } - } - - def cargoArgs = [cargoExecutable, 'build'] - if (buildType == 'release') { cargoArgs.add('--release') } - commandLine cargoArgs - - environment 'CARGO_TARGET_DIR', file('target').absolutePath - inputs.files fileTree('jni/src') - inputs.file 'jni/Cargo.toml' -} - -task copyNativeLibrary(type: Copy, dependsOn: buildRustLibrary) { - description = 'Copy the native library to Java resources' - group = 'build' - - def osName = System.getProperty('os.name').toLowerCase() - def libExtension = osName.contains('mac') ? '.dylib' : '.so' - - from file("target/${buildType}/libopensearch_datafusion_jni${libExtension}") - into file('src/main/resources/native') - filePermissions { unix(0644) } -} - -compileJava.dependsOn copyNativeLibrary -processResources.dependsOn copyNativeLibrary -sourcesJar.dependsOn copyNativeLibrary - -// copyNativeLibrary writes into src/main/resources/native — any task that scans -// the source tree must run after it to satisfy Gradle's implicit-dependency check. -tasks.named("forbiddenPatterns").configure { - dependsOn copyNativeLibrary - exclude '**/native/**' - exclude '**/*.parquet' -} - -for (def taskName : ["filepermissions", "spotlessJava", "spotlessJavaCheck"]) { - tasks.named(taskName).configure { dependsOn copyNativeLibrary } -} - -clean { - delete file('target') - delete file('src/main/resources/native') -} - test { jvmArgs += ["--add-opens", "java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED"] - systemProperty 'java.library.path', file('src/main/resources/native').absolutePath + jvmArgs += ["--enable-native-access=ALL-UNNAMED"] + systemProperty 'native.lib.path', project(':sandbox:libs:dataformat-native').ext.nativeLibPath.absolutePath + dependsOn ':sandbox:libs:dataformat-native:buildRustLibrary' } tasks.withType(JavaCompile).configureEach { @@ -104,9 +55,17 @@ tasks.withType(JavaCompile).configureEach { // TODO: Remove once back-end is built out with test suite testingConventions.enabled = false +tasks.named('forbiddenPatterns').configure { + exclude '**/*.parquet' +} + tasks.named('thirdPartyAudit').configure { ignoreViolations( 'org.apache.arrow.memory.util.MemoryUtil', 'org.apache.arrow.memory.util.MemoryUtil$1' ) } + +tasks.matching { it.name == 'missingJavadoc' }.configureEach { + enabled = false +} diff --git a/sandbox/plugins/analytics-backend-datafusion/jni-macros/src/lib.rs b/sandbox/plugins/analytics-backend-datafusion/jni-macros/src/lib.rs deleted file mode 100644 index 16adff9b1b2b1..0000000000000 --- a/sandbox/plugins/analytics-backend-datafusion/jni-macros/src/lib.rs +++ /dev/null @@ -1,73 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ -// Proc macro that wraps a JNI extern function body with panic-catching. -// On panic, throws a Java RuntimeException and returns a default value. -// -// Usage: -// #[jni_safe] — for functions returning () -// #[jni_safe(default = 0)] — for functions returning jlong, jint, etc. -// #[jni_safe(default = std::ptr::null_mut())] — for functions returning pointers -// -// The first parameter of the function MUST be `env: JNIEnv` (or `mut env: JNIEnv`). - -use proc_macro::TokenStream; -use quote::quote; -use syn::{parse_macro_input, ItemFn, Expr, parse_str}; - -#[proc_macro_attribute] -pub fn jni_safe(attr: TokenStream, item: TokenStream) -> TokenStream { - let input = parse_macro_input!(item as ItemFn); - - // Parse default value from attribute, e.g. #[jni_safe(default = 0)] - let default_expr: Expr = if attr.is_empty() { - parse_str("()").unwrap() - } else { - let attr_str = attr.to_string(); - let value = attr_str - .strip_prefix("default") - .and_then(|s| s.trim().strip_prefix('=')) - .map(|s| s.trim()) - .unwrap_or("()"); - parse_str(value).unwrap() - }; - - let attrs = &input.attrs; - let vis = &input.vis; - let sig = &input.sig; - let body = &input.block; - - let expanded = quote! { - #(#attrs)* - #vis #sig { - let __env_ptr = &mut env as *mut jni::JNIEnv; - match ::std::panic::catch_unwind(::std::panic::AssertUnwindSafe(|| { - let env = unsafe { &mut *__env_ptr }; - #body - })) { - Ok(result) => result, - Err(panic) => { - let msg = if let Some(s) = panic.downcast_ref::() { - s.clone() - } else if let Some(s) = panic.downcast_ref::<&str>() { - s.to_string() - } else { - "unknown panic".to_string() - }; - let __env = unsafe { &mut *__env_ptr }; - let _ = __env.throw_new( - "java/lang/RuntimeException", - format!("Native panic: {}", msg), - ); - #default_expr - } - } - } - }; - - expanded.into() -} diff --git a/sandbox/plugins/analytics-backend-datafusion/jni/src/lib.rs b/sandbox/plugins/analytics-backend-datafusion/jni/src/lib.rs deleted file mode 100644 index c27c8bdad8b99..0000000000000 --- a/sandbox/plugins/analytics-backend-datafusion/jni/src/lib.rs +++ /dev/null @@ -1,376 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -//! JNI bridge layer. -//! -//! This module is a thin adapter between Java's JNI types and the bridge-agnostic -//! API in [`api`]. All core logic lives in `api.rs` and `query_executor.rs`. -//! When migrating to JDK FFM, replace this file with an `extern "C"` bridge -//! that calls the same `api::*` functions. - -#[global_allocator] -static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; - -use std::cell::RefCell; -use std::sync::{Arc, OnceLock}; - -use datafusion::common::DataFusionError; -use jni::objects::{JByteArray, JClass, JObject, JObjectArray, JString}; -use jni::sys::{jint, jlong}; -use jni::{JNIEnv, JavaVM}; -use log::error; - -pub mod api; -pub mod cross_rt_stream; -pub mod executor; -pub mod io; -pub mod query_executor; -pub mod runtime_manager; -pub mod util; - -use jni_macros::jni_safe; - -use crate::runtime_manager::RuntimeManager; -use crate::util::*; - -// Global state -static TOKIO_RUNTIME_MANAGER: OnceLock> = OnceLock::new(); -static JAVA_VM: OnceLock = OnceLock::new(); - -thread_local! { - static THREAD_JNIENV: RefCell>> = RefCell::new(None); -} - -fn with_jni_env(f: F) -> R -where - F: FnOnce(&mut JNIEnv) -> R, -{ - THREAD_JNIENV.with(|cell| { - let mut opt = cell.borrow_mut(); - if opt.is_none() { - let jvm = JAVA_VM.get().expect("JavaVM not initialized"); - let env = jvm - .attach_current_thread_permanently() - .expect("Failed to attach thread to JVM"); - *opt = Some(env); - } - f(opt.as_mut().unwrap()) - }) -} - -fn get_tokio_rt_manager() -> Result<&'static Arc, DataFusionError> { - TOKIO_RUNTIME_MANAGER - .get() - .ok_or_else(|| DataFusionError::Execution("Runtime manager not initialized".to_string())) -} - -// Tokio runtime management -#[jni_safe] -#[no_mangle] -pub extern "system" fn Java_org_opensearch_be_datafusion_jni_NativeBridge_initTokioRuntimeManager( - mut env: JNIEnv, - _class: JClass, - cpu_threads: jint, -) { - JAVA_VM.get_or_init(|| env.get_java_vm().expect("Failed to get JavaVM")); - TOKIO_RUNTIME_MANAGER.get_or_init(|| Arc::new(RuntimeManager::new(cpu_threads as usize))); -} - -#[jni_safe] -#[no_mangle] -pub extern "system" fn Java_org_opensearch_be_datafusion_jni_NativeBridge_shutdownTokioRuntimeManager( - mut env: JNIEnv, - _class: JClass, -) { - if let Some(mgr) = TOKIO_RUNTIME_MANAGER.get() { - mgr.shutdown(); - } -} - -// Create DataFusion global runtime with user defined configuration -#[jni_safe(default = 0)] -#[no_mangle] -pub extern "system" fn Java_org_opensearch_be_datafusion_jni_NativeBridge_createGlobalRuntime( - mut env: JNIEnv, - _class: JClass, - memory_pool_limit: jlong, - _cache_manager_ptr: jlong, - spill_dir: JString, - spill_limit: jlong, -) -> jlong { - let spill_dir: String = match env.get_string(&spill_dir) { - Ok(s) => s.into(), - Err(e) => { - let _ = env.throw_new("java/lang/IllegalArgumentException", format!("Invalid spill dir: {:?}", e)); - return 0; - } - }; - - match api::create_global_runtime(memory_pool_limit, &spill_dir, spill_limit) { - Ok(ptr) => ptr as jlong, - Err(e) => { - let _ = env.throw_new("java/lang/RuntimeException", e.to_string()); - 0 - } - } -} - -#[jni_safe] -#[no_mangle] -pub extern "system" fn Java_org_opensearch_be_datafusion_jni_NativeBridge_closeGlobalRuntime( - mut env: JNIEnv, - _class: JClass, - ptr: jlong, -) { - unsafe { api::close_global_runtime(ptr as i64) }; -} - -// Create datafusion reader backed by shard view/catalog snapshot associated files -#[jni_safe(default = 0)] -#[no_mangle] -pub extern "system" fn Java_org_opensearch_be_datafusion_jni_NativeBridge_createDatafusionReader( - mut env: JNIEnv, - _class: JClass, - table_path: JString, - files: JObjectArray, -) -> jlong { - let table_path: String = match env.get_string(&table_path) { - Ok(s) => s.into(), - Err(e) => { - let _ = env.throw_new("java/lang/IllegalArgumentException", format!("Invalid table path: {:?}", e)); - return 0; - } - }; - let filenames = match parse_string_arr(env, files) { - Ok(f) => f, - Err(e) => { - let _ = env.throw_new("java/lang/IllegalArgumentException", format!("Invalid file list: {}", e)); - return 0; - } - }; - let tokio_rt_mgr = match get_tokio_rt_manager() { - Ok(m) => m, - Err(e) => { - let _ = env.throw_new("java/lang/IllegalStateException", e.to_string()); - return 0; - } - }; - - match api::create_reader(&table_path, filenames, tokio_rt_mgr) { - Ok(ptr) => ptr as jlong, - Err(e) => { - let _ = env.throw_new("java/lang/RuntimeException", e.to_string()); - 0 - } - } -} - - -#[jni_safe] -#[no_mangle] -pub extern "system" fn Java_org_opensearch_be_datafusion_jni_NativeBridge_closeDatafusionReader( - mut env: JNIEnv, - _class: JClass, - ptr: jlong, -) { - unsafe { api::close_reader(ptr as i64) }; -} - -// Executes the query for the substrait plan and returns a stream handle to listener -#[jni_safe] -#[no_mangle] -pub extern "system" fn Java_org_opensearch_be_datafusion_jni_NativeBridge_executeQueryAsync( - mut env: JNIEnv, - _class: JClass, - shard_view_ptr: jlong, - table_name: JString, - substrait_bytes: JObject, - runtime_ptr: jlong, - listener: JObject, -) { - let tokio_rt_mgr = match get_tokio_rt_manager() { - Ok(m) => m, - Err(e) => { - set_action_listener_error(env, listener, &e); - return; - } - }; - - let table_name: String = match env.get_string(&JString::from(table_name)) { - Ok(s) => s.into(), - Err(e) => { - set_action_listener_error(env, listener, &DataFusionError::Execution(format!("Invalid table name: {}", e))); - return; - } - }; - let plan_bytes_obj = unsafe { JByteArray::from_raw(substrait_bytes.as_raw()) }; - let plan_bytes = match env.convert_byte_array(plan_bytes_obj) { - Ok(b) => b, - Err(e) => { - set_action_listener_error(env, listener, &DataFusionError::Execution(format!("Failed to convert plan bytes: {}", e))); - return; - } - }; - let listener_ref = match env.new_global_ref(&listener) { - Ok(r) => r, - Err(e) => { - set_action_listener_error(env, listener, &DataFusionError::Execution(format!("Failed to create global ref: {}", e))); - return; - } - }; - - // Delegate to bridge-agnostic API — bridge does the block_on - let result = tokio_rt_mgr.io_runtime.block_on(unsafe { - api::execute_query(shard_view_ptr as i64, &table_name, &plan_bytes, runtime_ptr as i64, tokio_rt_mgr) - }); - - with_jni_env(|env| match result { - Ok(stream_ptr) => set_action_listener_ok_global(env, &listener_ref, stream_ptr as jlong), - Err(e) => { - error!("Query execution failed: {}", e); - set_action_listener_error_global(env, &listener_ref, &e); - } - }); -} - -// Get schema for the stream -#[jni_safe] -#[no_mangle] -pub extern "system" fn Java_org_opensearch_be_datafusion_jni_NativeBridge_streamGetSchema( - mut env: JNIEnv, - _class: JClass, - stream_ptr: jlong, - listener: JObject, -) { - if stream_ptr == 0 { - set_action_listener_error(env, listener, &DataFusionError::Execution("Invalid stream pointer".to_string())); - return; - } - match unsafe { api::stream_get_schema(stream_ptr as i64) } { - Ok(schema_ptr) => set_action_listener_ok(env, listener, schema_ptr as jlong), - Err(e) => set_action_listener_error(env, listener, &e), - } -} - -#[jni_safe] -#[no_mangle] -pub extern "system" fn Java_org_opensearch_be_datafusion_jni_NativeBridge_streamNext( - mut env: JNIEnv, - _class: JClass, - _runtime_ptr: jlong, - stream_ptr: jlong, - listener: JObject, -) { - let manager = match get_tokio_rt_manager() { - Ok(m) => m, - Err(e) => { - set_action_listener_error(env, listener, &e); - return; - } - }; - - let listener_ref = match env.new_global_ref(&listener) { - Ok(r) => r, - Err(e) => { - set_action_listener_error(env, listener, &DataFusionError::Execution(format!("Failed to create global ref: {}", e))); - return; - } - }; - - let result = manager.io_runtime.block_on(unsafe { api::stream_next(stream_ptr as i64) }); - - with_jni_env(|env| match result { - Ok(array_ptr) => set_action_listener_ok_global(env, &listener_ref, array_ptr as jlong), - Err(e) => { - error!("Stream next failed: {}", e); - set_action_listener_error_global(env, &listener_ref, &e); - } - }); -} - -#[jni_safe] -#[no_mangle] -pub extern "system" fn Java_org_opensearch_be_datafusion_jni_NativeBridge_streamClose( - mut env: JNIEnv, - _class: JClass, - stream_ptr: jlong, -) { - unsafe { api::stream_close(stream_ptr as i64) }; -} - -// Only used for tests -#[jni_safe(default = std::ptr::null_mut())] -#[no_mangle] -pub extern "system" fn Java_org_opensearch_be_datafusion_jni_NativeBridge_sqlToSubstrait( - mut env: JNIEnv, - _class: JClass, - shard_view_ptr: jlong, - table_name: JString, - sql: JString, - runtime_ptr: jlong, -) -> jni::sys::jbyteArray { - let manager = TOKIO_RUNTIME_MANAGER.get().expect("Runtime manager not initialized"); - let table_name: String = env.get_string(&table_name).expect("Invalid table name").into(); - let sql: String = env.get_string(&sql).expect("Invalid SQL").into(); - - let result = unsafe { - api::sql_to_substrait(shard_view_ptr as i64, &table_name, &sql, runtime_ptr as i64, manager) - }; - - match result { - Ok(bytes) => env.byte_array_from_slice(&bytes).expect("byte array").into_raw(), - Err(e) => { - let _ = env.throw_new("java/lang/RuntimeException", e.to_string()); - std::ptr::null_mut() - } - } -} - -// Tests panic, only used for testing -#[jni_safe] -#[no_mangle] -pub extern "system" fn Java_org_opensearch_be_datafusion_jni_NativeBridge_testPanic( - mut env: JNIEnv, - _class: JClass, - message: JString, -) { - let msg: String = env.get_string(&message).expect("Invalid message").into(); - panic!("{}", msg); -} - -// Cache manager stubs -#[jni_safe] -#[no_mangle] -pub extern "system" fn Java_org_opensearch_be_datafusion_jni_NativeBridge_cacheManagerAddFiles( - mut env: JNIEnv, - _class: JClass, - _runtime_ptr: jlong, - _file_paths: JObjectArray, -) { - // TODO: wire to native cache manager -} - -#[jni_safe] -#[no_mangle] -pub extern "system" fn Java_org_opensearch_be_datafusion_jni_NativeBridge_cacheManagerRemoveFiles( - mut env: JNIEnv, - _class: JClass, - _runtime_ptr: jlong, - _file_paths: JObjectArray, -) { - // TODO: wire to native cache manager -} - -#[jni_safe] -#[no_mangle] -pub extern "system" fn Java_org_opensearch_be_datafusion_jni_NativeBridge_initLogger( - mut env: JNIEnv, - _class: JClass, -) { - // TODO: wire Rust→Java logging bridge -} diff --git a/sandbox/plugins/analytics-backend-datafusion/jni/src/macros.rs b/sandbox/plugins/analytics-backend-datafusion/jni/src/macros.rs deleted file mode 100644 index 29a5bb3f3b4cf..0000000000000 --- a/sandbox/plugins/analytics-backend-datafusion/jni/src/macros.rs +++ /dev/null @@ -1,46 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ -// Macro to wrap JNI entry points with panic-catching and exception conversion. - -/// Wraps a JNI function body with panic catching. On panic, throws a Java RuntimeException -/// and returns `$default`. Usage: -/// -/// ```ignore -/// jni_entry!(env, 0, { -/// // body that may panic — `env` is &mut JNIEnv -/// 42 as jlong -/// }) -/// ``` -#[macro_export] -macro_rules! jni_entry { - ($env:expr, $default:expr, $body:block) => {{ - use std::panic::{catch_unwind, AssertUnwindSafe}; - let env_ptr = $env as *mut jni::JNIEnv; - match catch_unwind(AssertUnwindSafe(|| { - let env = unsafe { &mut *env_ptr }; - (|| -> _ { $body })() - })) { - Ok(result) => result, - Err(panic) => { - let msg = if let Some(s) = panic.downcast_ref::() { - s.clone() - } else if let Some(s) = panic.downcast_ref::<&str>() { - s.to_string() - } else { - "unknown panic".to_string() - }; - let env = unsafe { &mut *env_ptr }; - let _ = env.throw_new( - "java/lang/RuntimeException", - format!("Native panic: {}", msg), - ); - $default - } - } - }}; -} diff --git a/sandbox/plugins/analytics-backend-datafusion/jni/src/util.rs b/sandbox/plugins/analytics-backend-datafusion/jni/src/util.rs deleted file mode 100644 index 2e2e4a0057de9..0000000000000 --- a/sandbox/plugins/analytics-backend-datafusion/jni/src/util.rs +++ /dev/null @@ -1,161 +0,0 @@ - /* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ -use datafusion::error::DataFusionError; -use jni::objects::{GlobalRef, JObject, JObjectArray, JString, JValue}; -use jni::sys::jlong; -use jni::JNIEnv; -use log::error; -use object_store::ObjectMeta; -use std::panic::{catch_unwind, AssertUnwindSafe}; - -// JNI utility functions. - -/// Extracts a human-readable message from a panic payload. -fn panic_message(payload: Box) -> String { - if let Some(s) = payload.downcast_ref::() { - s.clone() - } else if let Some(s) = payload.downcast_ref::<&str>() { - s.to_string() - } else { - "Unknown Rust panic".to_string() - } -} - -/// Catches Rust panics at the JNI boundary and converts them to Java exceptions. -/// Returns `default` if a panic occurs, after throwing a RuntimeException on the Java side. -pub fn jni_safe(env: &mut JNIEnv, default: R, f: F) -> R -where - F: FnOnce(&mut JNIEnv) -> R, -{ - match catch_unwind(AssertUnwindSafe(|| f(env))) { - Ok(result) => result, - Err(panic) => { - let msg = panic_message(panic); - let _ = env.throw_new("java/lang/RuntimeException", format!("Native panic: {}", msg)); - default - } - } -} -/// Parse a Java String[] into Vec. -pub fn parse_string_arr(env: &mut JNIEnv, arr: JObjectArray) -> Result, DataFusionError> { - let len = env.get_array_length(&arr).map_err(|e| DataFusionError::Execution(e.to_string()))?; - let mut result = Vec::with_capacity(len as usize); - for i in 0..len { - let obj = env.get_object_array_element(&arr, i).map_err(|e| DataFusionError::Execution(e.to_string()))?; - let jstr = JString::from(obj); - let s: String = env.get_string(&jstr).map_err(|e| DataFusionError::Execution(e.to_string()))?.into(); - result.push(s); - } - Ok(result) -} - -/// Build ObjectMeta for each file using the given object store. -pub async fn create_object_metas( - store: &dyn object_store::ObjectStore, - base_path: &str, - filenames: Vec, -) -> Result, DataFusionError> { - let mut metas = Vec::with_capacity(filenames.len()); - for filename in filenames { - let full_path = if filename.starts_with('/') || filename.contains(base_path) { - filename - } else { - format!("{}/{}", base_path.trim_end_matches('/'), filename) - }; - let path = object_store::path::Path::from(full_path.as_str()); - let meta = store.head(&path).await.map_err(|e| { - DataFusionError::Execution(format!("Failed to get object meta for {}: {}", full_path, e)) - })?; - metas.push(meta); - } - Ok(metas) -} - -/// Call ActionListener.onResponse(Long) via JNI. -/// Never panics — logs and returns on failure. -pub fn set_action_listener_ok(env: &mut JNIEnv, listener: JObject, value: jlong) { - let Ok(boxed) = env.call_static_method( - "java/lang/Long", "valueOf", "(J)Ljava/lang/Long;", &[value.into()] - ) else { - error!("Failed to box Long for ActionListener.onResponse"); - return; - }; - if let Err(e) = env.call_method( - listener, "onResponse", "(Ljava/lang/Object;)V", &[(&boxed).into()] - ) { - error!("Failed to call ActionListener.onResponse: {}", e); - } -} - -/// Call ActionListener.onResponse(Long) via GlobalRef. -/// Never panics — logs and returns on failure. -pub fn set_action_listener_ok_global(env: &mut JNIEnv, listener: &GlobalRef, value: jlong) { - let Ok(boxed) = env.call_static_method( - "java/lang/Long", "valueOf", "(J)Ljava/lang/Long;", &[value.into()] - ) else { - error!("Failed to box Long for ActionListener.onResponse (global)"); - return; - }; - if let Err(e) = env.call_method( - listener.as_obj(), "onResponse", "(Ljava/lang/Object;)V", &[(&boxed).into()] - ) { - error!("Failed to call ActionListener.onResponse (global): {}", e); - } -} - -/// Call ActionListener.onFailure(Exception) via JNI. -/// Never panics — logs and returns on failure. -pub fn set_action_listener_error( - env: &mut JNIEnv, - listener: JObject, - error: &DataFusionError, -) { - let Ok(msg) = env.new_string(error.to_string()) else { - log::error!("Failed to create error string for ActionListener.onFailure"); - return; - }; - let Ok(exception) = env.new_object( - "java/lang/RuntimeException", - "(Ljava/lang/String;)V", - &[JValue::Object(&msg)], - ) else { - log::error!("Failed to create RuntimeException for ActionListener.onFailure"); - return; - }; - if let Err(e) = env.call_method( - listener, "onFailure", "(Ljava/lang/Exception;)V", &[JValue::Object(&exception)] - ) { - log::error!("Failed to call ActionListener.onFailure: {}", e); - } -} - -/// Call ActionListener.onFailure(Exception) via GlobalRef. -/// Never panics — logs and returns on failure. -pub fn set_action_listener_error_global( - env: &mut JNIEnv, - listener: &GlobalRef, - error: &DataFusionError, -) { - let Ok(msg) = env.new_string(error.to_string()) else { - log::error!("Failed to create error string for ActionListener.onFailure (global)"); - return; - }; - let Ok(exception) = env.new_object( - "java/lang/RuntimeException", - "(Ljava/lang/String;)V", - &[JValue::Object(&msg)], - ) else { - log::error!("Failed to create RuntimeException for ActionListener.onFailure (global)"); - return; - }; - if let Err(e) = env.call_method( - listener.as_obj(), "onFailure", "(Ljava/lang/Exception;)V", &[JValue::Object(&exception)] - ) { - log::error!("Failed to call ActionListener.onFailure (global): {}", e); - } -} diff --git a/sandbox/plugins/analytics-backend-datafusion/rust/.cargo/config.toml b/sandbox/plugins/analytics-backend-datafusion/rust/.cargo/config.toml new file mode 100644 index 0000000000000..9e828deba797a --- /dev/null +++ b/sandbox/plugins/analytics-backend-datafusion/rust/.cargo/config.toml @@ -0,0 +1,2 @@ +[build] +rustflags = ["--cfg", "tokio_unstable", "-C", "force-frame-pointers=yes", "-C", "symbol-mangling-version=v0"] diff --git a/sandbox/plugins/analytics-backend-datafusion/jni/Cargo.toml b/sandbox/plugins/analytics-backend-datafusion/rust/Cargo.toml similarity index 84% rename from sandbox/plugins/analytics-backend-datafusion/jni/Cargo.toml rename to sandbox/plugins/analytics-backend-datafusion/rust/Cargo.toml index 56768c7a01bd0..b407a8152577e 100644 --- a/sandbox/plugins/analytics-backend-datafusion/jni/Cargo.toml +++ b/sandbox/plugins/analytics-backend-datafusion/rust/Cargo.toml @@ -1,12 +1,13 @@ [package] -name = "opensearch-datafusion-jni" +name = "opensearch-datafusion" version = "0.1.0" edition = "2021" license = "Apache-2.0" +workspace = "../../../libs/dataformat-native/rust" [lib] -name = "opensearch_datafusion_jni" -crate-type = ["cdylib", "rlib"] +name = "opensearch_datafusion" +crate-type = ["rlib"] [dependencies] datafusion = { workspace = true } @@ -27,7 +28,6 @@ url = { workspace = true } prost = { workspace = true } substrait = { workspace = true } -jni = { workspace = true } tokio = { workspace = true } futures = { workspace = true } tokio-stream = { workspace = true } @@ -36,7 +36,7 @@ once_cell = { workspace = true } log = { workspace = true } num_cpus = { workspace = true } mimalloc = { workspace = true } -jni-macros = { path = "../jni-macros" } +native-bridge-common = { workspace = true } [dev-dependencies] criterion = { workspace = true } diff --git a/sandbox/plugins/analytics-backend-datafusion/jni/benches/query_bench.rs b/sandbox/plugins/analytics-backend-datafusion/rust/benches/query_bench.rs similarity index 100% rename from sandbox/plugins/analytics-backend-datafusion/jni/benches/query_bench.rs rename to sandbox/plugins/analytics-backend-datafusion/rust/benches/query_bench.rs diff --git a/sandbox/plugins/analytics-backend-datafusion/jni/src/api.rs b/sandbox/plugins/analytics-backend-datafusion/rust/src/api.rs similarity index 95% rename from sandbox/plugins/analytics-backend-datafusion/jni/src/api.rs rename to sandbox/plugins/analytics-backend-datafusion/rust/src/api.rs index 32517625a861b..c59ee913972a1 100644 --- a/sandbox/plugins/analytics-backend-datafusion/jni/src/api.rs +++ b/sandbox/plugins/analytics-backend-datafusion/rust/src/api.rs @@ -166,7 +166,28 @@ use futures::TryStreamExt; use crate::cross_rt_stream::CrossRtStream; use crate::runtime_manager::RuntimeManager; -use crate::util::create_object_metas; + +/// Build ObjectMeta for each file using the given object store. +pub async fn create_object_metas( + store: &dyn object_store::ObjectStore, + base_path: &str, + filenames: Vec, +) -> Result, DataFusionError> { + let mut metas = Vec::with_capacity(filenames.len()); + for filename in filenames { + let full_path = if filename.starts_with('/') || filename.contains(base_path) { + filename + } else { + format!("{}/{}", base_path.trim_end_matches('/'), filename) + }; + let path = object_store::path::Path::from(full_path.as_str()); + let meta = store.head(&path).await.map_err(|e| { + DataFusionError::Execution(format!("Failed to get object meta for {}: {}", full_path, e)) + })?; + metas.push(meta); + } + Ok(metas) +} /// Opaque runtime handle returned to the caller. /// Contains the DataFusion RuntimeEnv (memory pool, disk spill, cache). diff --git a/sandbox/plugins/analytics-backend-datafusion/jni/src/cross_rt_stream.rs b/sandbox/plugins/analytics-backend-datafusion/rust/src/cross_rt_stream.rs similarity index 100% rename from sandbox/plugins/analytics-backend-datafusion/jni/src/cross_rt_stream.rs rename to sandbox/plugins/analytics-backend-datafusion/rust/src/cross_rt_stream.rs diff --git a/sandbox/plugins/analytics-backend-datafusion/jni/src/executor.rs b/sandbox/plugins/analytics-backend-datafusion/rust/src/executor.rs similarity index 100% rename from sandbox/plugins/analytics-backend-datafusion/jni/src/executor.rs rename to sandbox/plugins/analytics-backend-datafusion/rust/src/executor.rs diff --git a/sandbox/plugins/analytics-backend-datafusion/rust/src/ffm.rs b/sandbox/plugins/analytics-backend-datafusion/rust/src/ffm.rs new file mode 100644 index 0000000000000..f3030d4084627 --- /dev/null +++ b/sandbox/plugins/analytics-backend-datafusion/rust/src/ffm.rs @@ -0,0 +1,166 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +//! FFM bridge for DataFusion. + +use std::slice; +use std::str; +use std::sync::Arc; + +use native_bridge_common::ffm_safe; +use parking_lot::RwLock; + +use crate::api; +use crate::runtime_manager::RuntimeManager; + +static TOKIO_RUNTIME_MANAGER: RwLock>> = RwLock::new(None); + +unsafe fn str_from_raw<'a>(ptr: *const u8, len: i64) -> Result<&'a str, String> { + if ptr.is_null() { + return Err("null string pointer".to_string()); + } + if len < 0 { + return Err(format!("negative string length: {}", len)); + } + let bytes = slice::from_raw_parts(ptr, len as usize); + str::from_utf8(bytes).map_err(|e| format!("invalid UTF-8: {}", e)) +} + +fn get_rt_manager() -> Result, String> { + TOKIO_RUNTIME_MANAGER + .read() + .clone() + .ok_or_else(|| "Runtime manager not initialized".to_string()) +} + +#[no_mangle] +pub extern "C" fn df_init_runtime_manager(cpu_threads: i32) { + let mut guard = TOKIO_RUNTIME_MANAGER.write(); + *guard = Some(Arc::new(RuntimeManager::new(cpu_threads as usize))); +} + +#[no_mangle] +pub extern "C" fn df_shutdown_runtime_manager() { + let mgr = TOKIO_RUNTIME_MANAGER.write().take(); + if let Some(mgr) = mgr { + mgr.shutdown(); + } +} + +#[ffm_safe] +#[no_mangle] +pub unsafe extern "C" fn df_create_global_runtime( + memory_pool_limit: i64, + spill_dir_ptr: *const u8, + spill_dir_len: i64, + spill_limit: i64, +) -> i64 { + let spill_dir = str_from_raw(spill_dir_ptr, spill_dir_len).map_err(|e| format!("df_create_global_runtime: {}", e))?; + api::create_global_runtime(memory_pool_limit, spill_dir, spill_limit) + .map_err(|e| e.to_string()) +} + +#[no_mangle] +pub unsafe extern "C" fn df_close_global_runtime(ptr: i64) { + api::close_global_runtime(ptr); +} + +#[ffm_safe] +#[no_mangle] +pub unsafe extern "C" fn df_create_reader( + table_path_ptr: *const u8, + table_path_len: i64, + files_ptr: *const *const u8, + files_len_ptr: *const i64, + files_count: i64, +) -> i64 { + let table_path = str_from_raw(table_path_ptr, table_path_len).map_err(|e| format!("df_create_reader: {}", e))?; + let mut filenames = Vec::with_capacity(files_count as usize); + for i in 0..files_count as usize { + let ptr = *files_ptr.add(i); + let len = *files_len_ptr.add(i); + filenames.push(str_from_raw(ptr, len).map_err(|e| format!("df_create_reader: {}", e))?.to_string()); + } + let mgr = get_rt_manager()?; + api::create_reader(table_path, filenames, &mgr).map_err(|e| e.to_string()) +} + +#[no_mangle] +pub unsafe extern "C" fn df_close_reader(ptr: i64) { + api::close_reader(ptr); +} + +#[ffm_safe] +#[no_mangle] +pub unsafe extern "C" fn df_execute_query( + shard_view_ptr: i64, + table_name_ptr: *const u8, + table_name_len: i64, + plan_ptr: *const u8, + plan_len: i64, + runtime_ptr: i64, +) -> i64 { + let mgr = get_rt_manager()?; + let table_name = str_from_raw(table_name_ptr, table_name_len).map_err(|e| format!("df_execute_query: {}", e))?; + let plan_bytes = slice::from_raw_parts(plan_ptr, plan_len as usize); + mgr.io_runtime + .block_on(api::execute_query(shard_view_ptr, table_name, plan_bytes, runtime_ptr, &mgr)) + .map_err(|e| e.to_string()) +} + +#[ffm_safe] +#[no_mangle] +pub unsafe extern "C" fn df_stream_get_schema(stream_ptr: i64) -> i64 { + api::stream_get_schema(stream_ptr).map_err(|e| e.to_string()) +} + +#[ffm_safe] +#[no_mangle] +pub unsafe extern "C" fn df_stream_next(stream_ptr: i64) -> i64 { + let mgr = get_rt_manager()?; + mgr.io_runtime + .block_on(api::stream_next(stream_ptr)) + .map_err(|e| e.to_string()) +} + +#[no_mangle] +pub unsafe extern "C" fn df_stream_close(stream_ptr: i64) { + api::stream_close(stream_ptr); +} + +#[ffm_safe] +#[no_mangle] +pub unsafe extern "C" fn df_sql_to_substrait( + shard_view_ptr: i64, + table_name_ptr: *const u8, + table_name_len: i64, + sql_ptr: *const u8, + sql_len: i64, + runtime_ptr: i64, + out_ptr: *mut u8, + out_cap: i64, + out_len: *mut i64, +) -> i64 { + let mgr = get_rt_manager()?; + let table_name = str_from_raw(table_name_ptr, table_name_len).map_err(|e| format!("df_sql_to_substrait: table_name: {}", e))?; + let sql = str_from_raw(sql_ptr, sql_len).map_err(|e| format!("df_sql_to_substrait: sql: {}", e))?; + let bytes = api::sql_to_substrait(shard_view_ptr, table_name, sql, runtime_ptr, &mgr) + .map_err(|e| e.to_string())?; + if bytes.len() > out_cap as usize { + return Err(format!( + "substrait plan size {} exceeds buffer capacity {}", + bytes.len(), + out_cap + )); + } + std::ptr::copy_nonoverlapping(bytes.as_ptr(), out_ptr, bytes.len()); + if !out_len.is_null() { + *out_len = bytes.len() as i64; + } + Ok(0) +} diff --git a/sandbox/plugins/analytics-backend-datafusion/jni/src/io.rs b/sandbox/plugins/analytics-backend-datafusion/rust/src/io.rs similarity index 100% rename from sandbox/plugins/analytics-backend-datafusion/jni/src/io.rs rename to sandbox/plugins/analytics-backend-datafusion/rust/src/io.rs diff --git a/sandbox/plugins/analytics-backend-datafusion/rust/src/lib.rs b/sandbox/plugins/analytics-backend-datafusion/rust/src/lib.rs new file mode 100644 index 0000000000000..0fbcbe5ac200d --- /dev/null +++ b/sandbox/plugins/analytics-backend-datafusion/rust/src/lib.rs @@ -0,0 +1,20 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +//! DataFusion native execution engine for OpenSearch. +//! +//! The bridge-agnostic API lives in [`api`]. The FFM bridge (`ffm.rs`) exports +//! `extern "C"` functions for JDK FFM. + +pub mod api; +pub mod cross_rt_stream; +pub mod executor; +pub mod ffm; +pub mod io; +pub mod query_executor; +pub mod runtime_manager; diff --git a/sandbox/plugins/analytics-backend-datafusion/jni/src/query_executor.rs b/sandbox/plugins/analytics-backend-datafusion/rust/src/query_executor.rs similarity index 97% rename from sandbox/plugins/analytics-backend-datafusion/jni/src/query_executor.rs rename to sandbox/plugins/analytics-backend-datafusion/rust/src/query_executor.rs index c044f0d23c90c..c8db3c4a7991d 100644 --- a/sandbox/plugins/analytics-backend-datafusion/jni/src/query_executor.rs +++ b/sandbox/plugins/analytics-backend-datafusion/rust/src/query_executor.rs @@ -21,7 +21,6 @@ use datafusion::datasource::file_format::parquet::ParquetFormat; use datafusion::execution::cache::cache_manager::CacheManagerConfig; use datafusion::execution::cache::{CacheAccessor, DefaultListFilesCache}; use datafusion_substrait::logical_plan::consumer::from_substrait_plan; -use jni::sys::jlong; use log::error; use object_store::ObjectMeta; use prost::Message; @@ -40,7 +39,7 @@ pub async fn execute_query( plan_bytes: Vec, runtime: &DataFusionRuntime, cpu_executor: DedicatedExecutor, -) -> Result { +) -> Result { // Pre-populate the list-files cache so DataFusion doesn't re-list the directory let list_file_cache = Arc::new(DefaultListFilesCache::default()); let table_scoped_path = datafusion::execution::cache::TableScopedPath { @@ -132,5 +131,5 @@ pub async fn execute_query( cross_rt_stream, ); - Ok(Box::into_raw(Box::new(wrapped)) as jlong) + Ok(Box::into_raw(Box::new(wrapped)) as i64) } diff --git a/sandbox/plugins/analytics-backend-datafusion/jni/src/runtime_manager.rs b/sandbox/plugins/analytics-backend-datafusion/rust/src/runtime_manager.rs similarity index 100% rename from sandbox/plugins/analytics-backend-datafusion/jni/src/runtime_manager.rs rename to sandbox/plugins/analytics-backend-datafusion/rust/src/runtime_manager.rs diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionService.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionService.java index 8c72ae4069a9b..48d87a6ecfc18 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionService.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionService.java @@ -12,7 +12,7 @@ import org.apache.arrow.memory.RootAllocator; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.opensearch.be.datafusion.jni.NativeBridge; +import org.opensearch.be.datafusion.nativelib.NativeBridge; import org.opensearch.common.lifecycle.AbstractLifecycleComponent; import java.io.IOException; diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionContext.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionContext.java index 32aa6cce01b2b..c1c292470429b 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionContext.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionContext.java @@ -9,7 +9,7 @@ package org.opensearch.be.datafusion; import org.opensearch.action.search.SearchShardTask; -import org.opensearch.be.datafusion.jni.StreamHandle; +import org.opensearch.be.datafusion.nativelib.StreamHandle; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.engine.IndexFilterTree; import org.opensearch.search.SearchExecutionContext; diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionReader.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionReader.java index daee9669cf7e2..966dbad5fd2e0 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionReader.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionReader.java @@ -10,7 +10,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.opensearch.be.datafusion.jni.ReaderHandle; +import org.opensearch.be.datafusion.nativelib.ReaderHandle; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.engine.exec.WriterFileSet; diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionResultStream.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionResultStream.java index b7ed6b3e2ade1..e558c69abc1ea 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionResultStream.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionResultStream.java @@ -20,8 +20,8 @@ import org.apache.arrow.vector.types.pojo.Schema; import org.opensearch.analytics.backend.EngineResultBatch; import org.opensearch.analytics.backend.EngineResultStream; -import org.opensearch.be.datafusion.jni.NativeBridge; -import org.opensearch.be.datafusion.jni.StreamHandle; +import org.opensearch.be.datafusion.nativelib.NativeBridge; +import org.opensearch.be.datafusion.nativelib.StreamHandle; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.core.action.ActionListener; diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearchExecEngine.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearchExecEngine.java index 03f36693e2d21..1a4596579aecf 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearchExecEngine.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearchExecEngine.java @@ -12,7 +12,7 @@ import org.opensearch.analytics.backend.EngineResultStream; import org.opensearch.analytics.backend.ExecutionContext; import org.opensearch.analytics.backend.SearchExecEngine; -import org.opensearch.be.datafusion.jni.StreamHandle; +import org.opensearch.be.datafusion.nativelib.StreamHandle; import org.opensearch.common.annotation.ExperimentalApi; import java.io.IOException; diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearcher.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearcher.java index 4434ef53686d0..9c8f36dafed26 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearcher.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearcher.java @@ -8,9 +8,9 @@ package org.opensearch.be.datafusion; -import org.opensearch.be.datafusion.jni.NativeBridge; -import org.opensearch.be.datafusion.jni.ReaderHandle; -import org.opensearch.be.datafusion.jni.StreamHandle; +import org.opensearch.be.datafusion.nativelib.NativeBridge; +import org.opensearch.be.datafusion.nativelib.ReaderHandle; +import org.opensearch.be.datafusion.nativelib.StreamHandle; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.core.action.ActionListener; import org.opensearch.index.engine.exec.EngineSearcher; diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/NativeRuntimeHandle.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/NativeRuntimeHandle.java index c75307c4e2047..229ccaac29e97 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/NativeRuntimeHandle.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/NativeRuntimeHandle.java @@ -8,71 +8,61 @@ package org.opensearch.be.datafusion; -import org.opensearch.be.datafusion.jni.NativeBridge; +import org.opensearch.analytics.backend.jni.NativeHandle; +import org.opensearch.be.datafusion.nativelib.NativeBridge; import org.opensearch.common.annotation.ExperimentalApi; -import java.io.Closeable; - /** - * Thread-safe wrapper around a native runtime pointer. + * Type-safe handle for the native DataFusion global runtime. *

- * Encapsulates the raw {@code long} so it cannot be copied or used after - * the runtime is destroyed. All consumers obtain the pointer via {@link #get()} - * which performs a liveness check on every call. + * Extends {@link NativeHandle} to get automatic resource management, Cleaner-based + * GC safety net, stale pointer tracking, and double-close prevention. *

- * Implements {@link Closeable} so it integrates with try-with-resources, - * {@code IOUtils.close()}, and leak detection infrastructure. + * The runtime pointer lives until {@link #close()} is called, which invokes + * {@link NativeBridge#closeGlobalRuntime(long)}. * * @opensearch.experimental */ @ExperimentalApi -public class NativeRuntimeHandle implements Closeable { - - private volatile long pointer; +public class NativeRuntimeHandle extends NativeHandle { /** - * Creates a handle wrapping the given native pointer. + * Creates a handle wrapping the given native runtime pointer. * * @param pointer the native runtime pointer (must be non-zero) * @throws IllegalArgumentException if pointer is zero */ public NativeRuntimeHandle(long pointer) { - if (pointer == 0L) { - throw new IllegalArgumentException("Cannot create NativeRuntimeHandle with null pointer"); - } - this.pointer = pointer; + super(pointer); } /** * Returns the native runtime pointer, checking that it is still live. + *

+ * This method preserves backward compatibility with callers that use + * {@code handle.get()} instead of {@code handle.getPointer()}. * + * @return the native runtime pointer * @throws IllegalStateException if the handle has been closed */ public long get() { - long ptr = pointer; - if (ptr == 0L) { - throw new IllegalStateException("Native runtime handle has been closed"); - } - return ptr; + return getPointer(); } /** * Returns true if the handle has not been closed. */ public boolean isOpen() { - return pointer != 0L; + try { + ensureOpen(); + return true; + } catch (IllegalStateException e) { + return false; + } } - /** - * Releases the native runtime. Idempotent and thread-safe. - * After this call, {@link #get()} will throw. - */ @Override - public synchronized void close() { - long ptr = pointer; - if (ptr != 0L) { - NativeBridge.closeGlobalRuntime(ptr); - pointer = 0L; - } + protected void doClose() { + NativeBridge.closeGlobalRuntime(ptr); } } diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/NativeBridge.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/NativeBridge.java deleted file mode 100644 index db14b6d40ae4c..0000000000000 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/NativeBridge.java +++ /dev/null @@ -1,169 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.be.datafusion.jni; - -/** - * Core JNI bridge to native DataFusion library. - * All native method declarations are centralized here. - */ -public final class NativeBridge { - - private static volatile boolean loaded = false; - - static { - loadNativeLibrary(); - } - - private NativeBridge() {} - - private static synchronized void loadNativeLibrary() { - if (loaded) return; - try { - System.loadLibrary("opensearch_datafusion_jni"); - loaded = true; - } catch (UnsatisfiedLinkError e) { - throw new ExceptionInInitializerError("Failed to load native library opensearch_datafusion_jni: " + e.getMessage()); - } - } - - // ---- Tokio runtime management ---- - - /** - * Initializes the Tokio runtime manager with dedicated CPU and IO thread pools. - * Must be called once at node startup before any query execution. - * @param cpuThreads number of CPU threads for the dedicated executor - */ - public static native void initTokioRuntimeManager(int cpuThreads); - - /** - * Shuts down the Tokio runtime manager and all associated thread pools. - */ - public static native void shutdownTokioRuntimeManager(); - - // ---- DataFusion runtime ---- - - /** - * Creates a global DataFusion runtime with the given resource limits. - * @param memoryLimit the maximum memory in bytes - * @param cacheManagerPtr the native cache manager pointer (0 for no cache) - * @param spillDir the directory path for spill files - * @param spillLimit the maximum spill size in bytes - */ - public static native long createGlobalRuntime(long memoryLimit, long cacheManagerPtr, String spillDir, long spillLimit); - - /** - * Closes the global DataFusion runtime. - * @param ptr the native runtime pointer - */ - public static native void closeGlobalRuntime(long ptr); - - // ---- Reader management ---- - - /** - * Creates a native DataFusion reader. - * @param path the directory path containing data files - * @param files the array of file names to read - */ - public static native long createDatafusionReader(String path, String[] files); - - /** - * Closes the native DataFusion reader. - * @param ptr the native reader pointer - */ - public static native void closeDatafusionReader(long ptr); - - // ---- Query execution ---- - - /** - * Executes a substrait plan asynchronously against the given reader. - * The result stream pointer is delivered via the ActionListener callback. - * - * @param readerPtr the native reader pointer - * @param tableName the target table name - * @param substraitPlan the serialized substrait plan bytes - * @param runtimePtr the native runtime pointer - * @param listener callback receiving the stream pointer (Long) or error - */ - public static native void executeQueryAsync( - long readerPtr, - String tableName, - byte[] substraitPlan, - long runtimePtr, - org.opensearch.core.action.ActionListener listener - ); - - // ---- Stream operations ---- - - /** - * Returns the Arrow schema address for the given stream. - * Synchronous — schema is cached on the stream and fast to access. - * - * @param streamPtr the native stream pointer - * @param listener callback receiving the ArrowSchema C Data Interface address - */ - public static native void streamGetSchema(long streamPtr, org.opensearch.core.action.ActionListener listener); - - /** - * Loads the next record batch from the stream asynchronously. - * - * @param runtimePtr the native runtime pointer - * @param streamPtr the native stream pointer - * @param listener callback receiving ArrowArray C Data Interface address, or 0 if end-of-stream - */ - public static native void streamNext(long runtimePtr, long streamPtr, org.opensearch.core.action.ActionListener listener); - - /** - * Closes the native stream and releases associated resources. - * - * @param streamPtr the native stream pointer to close - */ - public static native void streamClose(long streamPtr); - - // ---- Cache management ---- - - /** - * Notifies the native cache manager that new files are available for caching. - * @param runtimePtr the native runtime pointer - * @param filePaths absolute paths of the new files - */ - public static native void cacheManagerAddFiles(long runtimePtr, String[] filePaths); - - /** - * Notifies the native cache manager that files have been deleted and should be evicted. - * @param runtimePtr the native runtime pointer - * @param filePaths absolute paths of the deleted files - */ - public static native void cacheManagerRemoveFiles(long runtimePtr, String[] filePaths); - - // ---- Test helpers ---- - - /** - * Converts a SQL query to serialized Substrait plan bytes (test only). - * Registers the table from the reader, plans the SQL, and returns the substrait bytes. - * @param readerPtr the native reader pointer - * @param tableName the table name to register - * @param sql the SQL query string - * @param runtimePtr the native runtime pointer - * @return serialized substrait plan bytes - */ - public static native byte[] sqlToSubstrait(long readerPtr, String tableName, String sql, long runtimePtr); - - /** - * Deliberately panics in native code (test only). Used to verify panic catching. - * @param message the panic message - */ - public static native void testPanic(String message); - - // ---- Logger ---- - - /** - * Initializes the Rust-to-Java logging bridge. - */ - public static native void initLogger(); -} diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/nativelib/NativeBridge.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/nativelib/NativeBridge.java new file mode 100644 index 0000000000000..de8352760eaf6 --- /dev/null +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/nativelib/NativeBridge.java @@ -0,0 +1,280 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.datafusion.nativelib; + +import org.opensearch.analytics.backend.jni.NativeHandle; +import org.opensearch.core.action.ActionListener; +import org.opensearch.nativebridge.spi.NativeCall; +import org.opensearch.nativebridge.spi.NativeLibraryLoader; + +import java.lang.foreign.FunctionDescriptor; +import java.lang.foreign.Linker; +import java.lang.foreign.SymbolLookup; +import java.lang.foreign.ValueLayout; +import java.lang.invoke.MethodHandle; + +/** + * FFM bridge to native DataFusion library. + * + *

Pointer lifecycle (no Arena needed)

+ *

Native pointers returned by {@code createGlobalRuntime}, {@code createDatafusionReader}, + * and {@code executeQueryAsync} are opaque {@code long} values — Rust heap addresses cast to + * {@code i64}. They are not {@code MemorySegment}s and do not require an Arena. They + * live until explicitly freed by the corresponding close method.

+ * + *

Arena usage

+ *

{@link NativeCall} creates a confined Arena for short-lived allocations (strings, byte + * arrays) that are only needed for the duration of the FFM call. The Arena is closed + * immediately after the call returns, freeing all temp memory.

+ * + *

Error convention

+ *

Functions return {@code i64}: {@code >= 0} is success, {@code < 0} is a negated pointer + * to a heap-allocated error string. {@link NativeCall#invoke} reads and frees the error, + * then throws.

+ */ +public final class NativeBridge { + + private static final MethodHandle INIT_RUNTIME_MANAGER; + private static final MethodHandle SHUTDOWN_RUNTIME_MANAGER; + private static final MethodHandle CREATE_GLOBAL_RUNTIME; + private static final MethodHandle CLOSE_GLOBAL_RUNTIME; + private static final MethodHandle CREATE_READER; + private static final MethodHandle CLOSE_READER; + private static final MethodHandle EXECUTE_QUERY; + private static final MethodHandle STREAM_GET_SCHEMA; + private static final MethodHandle STREAM_NEXT; + private static final MethodHandle STREAM_CLOSE; + private static final MethodHandle SQL_TO_SUBSTRAIT; + + static { + SymbolLookup lib = NativeLibraryLoader.symbolLookup(); + Linker linker = Linker.nativeLinker(); + + INIT_RUNTIME_MANAGER = linker.downcallHandle( + lib.find("df_init_runtime_manager").orElseThrow(), + FunctionDescriptor.ofVoid(ValueLayout.JAVA_INT) + ); + + SHUTDOWN_RUNTIME_MANAGER = linker.downcallHandle( + lib.find("df_shutdown_runtime_manager").orElseThrow(), + FunctionDescriptor.ofVoid() + ); + + CREATE_GLOBAL_RUNTIME = linker.downcallHandle( + lib.find("df_create_global_runtime").orElseThrow(), + FunctionDescriptor.of( + ValueLayout.JAVA_LONG, + ValueLayout.JAVA_LONG, + ValueLayout.ADDRESS, + ValueLayout.JAVA_LONG, + ValueLayout.JAVA_LONG + ) + ); + + CLOSE_GLOBAL_RUNTIME = linker.downcallHandle( + lib.find("df_close_global_runtime").orElseThrow(), + FunctionDescriptor.ofVoid(ValueLayout.JAVA_LONG) + ); + + CREATE_READER = linker.downcallHandle( + lib.find("df_create_reader").orElseThrow(), + FunctionDescriptor.of( + ValueLayout.JAVA_LONG, + ValueLayout.ADDRESS, + ValueLayout.JAVA_LONG, + ValueLayout.ADDRESS, + ValueLayout.ADDRESS, + ValueLayout.JAVA_LONG + ) + ); + + CLOSE_READER = linker.downcallHandle(lib.find("df_close_reader").orElseThrow(), FunctionDescriptor.ofVoid(ValueLayout.JAVA_LONG)); + + EXECUTE_QUERY = linker.downcallHandle( + lib.find("df_execute_query").orElseThrow(), + FunctionDescriptor.of( + ValueLayout.JAVA_LONG, + ValueLayout.JAVA_LONG, + ValueLayout.ADDRESS, + ValueLayout.JAVA_LONG, + ValueLayout.ADDRESS, + ValueLayout.JAVA_LONG, + ValueLayout.JAVA_LONG + ) + ); + + STREAM_GET_SCHEMA = linker.downcallHandle( + lib.find("df_stream_get_schema").orElseThrow(), + FunctionDescriptor.of(ValueLayout.JAVA_LONG, ValueLayout.JAVA_LONG) + ); + + STREAM_NEXT = linker.downcallHandle( + lib.find("df_stream_next").orElseThrow(), + FunctionDescriptor.of(ValueLayout.JAVA_LONG, ValueLayout.JAVA_LONG) + ); + + STREAM_CLOSE = linker.downcallHandle(lib.find("df_stream_close").orElseThrow(), FunctionDescriptor.ofVoid(ValueLayout.JAVA_LONG)); + + // i64 df_sql_to_substrait(shard_ptr, table_ptr, table_len, sql_ptr, sql_len, runtime_ptr, out_ptr, out_cap, out_len) + SQL_TO_SUBSTRAIT = linker.downcallHandle( + lib.find("df_sql_to_substrait").orElseThrow(), + FunctionDescriptor.of( + ValueLayout.JAVA_LONG, + ValueLayout.JAVA_LONG, + ValueLayout.ADDRESS, + ValueLayout.JAVA_LONG, + ValueLayout.ADDRESS, + ValueLayout.JAVA_LONG, + ValueLayout.JAVA_LONG, + ValueLayout.ADDRESS, + ValueLayout.JAVA_LONG, + ValueLayout.ADDRESS + ) + ); + } + + private NativeBridge() {} + + // ---- Tokio runtime management (no Arena needed — no string/buffer args) ---- + + public static void initTokioRuntimeManager(int cpuThreads) { + NativeCall.invokeVoid(INIT_RUNTIME_MANAGER, cpuThreads); + } + + public static void shutdownTokioRuntimeManager() { + NativeCall.invokeVoid(SHUTDOWN_RUNTIME_MANAGER); + } + + // ---- DataFusion runtime (confined Arena for spillDir string only) ---- + + /** + * Creates a global DataFusion runtime. Returns an opaque native pointer ({@code long}). + * This pointer is not a MemorySegment — it's a Rust heap address that lives + * until {@link #closeGlobalRuntime} is called. + */ + public static long createGlobalRuntime(long memoryLimit, long cacheManagerPtr, String spillDir, long spillLimit) { + try (var call = new NativeCall()) { + var dir = call.str(spillDir); + return call.invoke(CREATE_GLOBAL_RUNTIME, memoryLimit, dir.segment(), dir.len(), spillLimit); + } + } + + /** Frees the native runtime. Safe to call once. */ + public static void closeGlobalRuntime(long ptr) { + NativeCall.invokeVoid(CLOSE_GLOBAL_RUNTIME, ptr); + } + + // ---- Reader management (confined Arena for path + file strings) ---- + + /** + * Creates a native reader. Returns an opaque native pointer. + * Freed by {@link #closeDatafusionReader}. + */ + public static long createDatafusionReader(String path, String[] files) { + try (var call = new NativeCall()) { + var p = call.str(path); + var f = call.strArray(files); + return call.invoke(CREATE_READER, p.segment(), p.len(), f.ptrs(), f.lens(), f.count()); + } + } + + public static void closeDatafusionReader(long ptr) { + NativeCall.invokeVoid(CLOSE_READER, ptr); + } + + // ---- Query execution (confined Arena for tableName + plan bytes) ---- + + public static void executeQueryAsync( + long readerPtr, + String tableName, + byte[] substraitPlan, + long runtimePtr, + ActionListener listener + ) { + try { + NativeHandle.validatePointer(readerPtr, "reader"); + NativeHandle.validatePointer(runtimePtr, "runtime"); + } catch (Exception e) { + listener.onFailure(e); + return; + } + try (var call = new NativeCall()) { + var table = call.str(tableName); + long result = call.invoke( + EXECUTE_QUERY, + readerPtr, + table.segment(), + table.len(), + call.bytes(substraitPlan), + (long) substraitPlan.length, + runtimePtr + ); + listener.onResponse(result); + } catch (Throwable t) { + listener.onFailure(t instanceof Exception ? (Exception) t : new RuntimeException(t)); + } + } + + // ---- Stream operations (no Arena needed — only long args) ---- + + public static void streamGetSchema(long streamPtr, ActionListener listener) { + try { + NativeHandle.validatePointer(streamPtr, "stream"); + long result = NativeLibraryLoader.checkResult((long) STREAM_GET_SCHEMA.invokeExact(streamPtr)); + listener.onResponse(result); + } catch (Throwable t) { + listener.onFailure(t instanceof Exception ? (Exception) t : new RuntimeException(t)); + } + } + + public static void streamNext(long runtimePtr, long streamPtr, ActionListener listener) { + try { + NativeHandle.validatePointer(streamPtr, "stream"); + long result = NativeLibraryLoader.checkResult((long) STREAM_NEXT.invokeExact(streamPtr)); + listener.onResponse(result); + } catch (Throwable t) { + listener.onFailure(t instanceof Exception ? (Exception) t : new RuntimeException(t)); + } + } + + public static void streamClose(long streamPtr) { + NativeCall.invokeVoid(STREAM_CLOSE, streamPtr); + } + + // ---- Stubs ---- + + public static byte[] sqlToSubstrait(long readerPtr, String tableName, String sql, long runtimePtr) { + NativeHandle.validatePointer(readerPtr, "reader"); + NativeHandle.validatePointer(runtimePtr, "runtime"); + try (var call = new NativeCall()) { + var table = call.str(tableName); + var query = call.str(sql); + var out = call.outBuffer(1024 * 1024); + call.invoke( + SQL_TO_SUBSTRAIT, + readerPtr, + table.segment(), + table.len(), + query.segment(), + query.len(), + runtimePtr, + out.data(), + (long) out.capacity(), + out.lenOut() + ); + return out.toByteArray(); + } + } + + public static void cacheManagerAddFiles(long runtimePtr, String[] filePaths) {} + + public static void cacheManagerRemoveFiles(long runtimePtr, String[] filePaths) {} + + public static void initLogger() {} +} diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/ReaderHandle.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/nativelib/ReaderHandle.java similarity index 96% rename from sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/ReaderHandle.java rename to sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/nativelib/ReaderHandle.java index e563c21afdf6b..7496584eb1196 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/ReaderHandle.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/nativelib/ReaderHandle.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.be.datafusion.jni; +package org.opensearch.be.datafusion.nativelib; import org.opensearch.analytics.backend.jni.NativeHandle; diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/StreamHandle.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/nativelib/StreamHandle.java similarity index 96% rename from sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/StreamHandle.java rename to sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/nativelib/StreamHandle.java index 45b8b1f621587..91525dc72eef2 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/StreamHandle.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/nativelib/StreamHandle.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.be.datafusion.jni; +package org.opensearch.be.datafusion.nativelib; import org.opensearch.analytics.backend.jni.NativeHandle; import org.opensearch.be.datafusion.NativeRuntimeHandle; diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/package-info.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/nativelib/package-info.java similarity index 76% rename from sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/package-info.java rename to sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/nativelib/package-info.java index 6a8481365c71c..efa83f5f47c72 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/package-info.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/nativelib/package-info.java @@ -11,9 +11,9 @@ * *

This package provides: *

    - *
  • Type-safe native handle wrappers ({@link org.opensearch.be.datafusion.jni.ReaderHandle})
  • - *
  • Centralized native method declarations ({@link org.opensearch.be.datafusion.jni.NativeBridge})
  • + *
  • Type-safe native handle wrappers ({@link org.opensearch.be.datafusion.nativelib.ReaderHandle})
  • + *
  • Centralized native method declarations ({@link org.opensearch.be.datafusion.nativelib.NativeBridge})
  • *
* */ -package org.opensearch.be.datafusion.jni; +package org.opensearch.be.datafusion.nativelib; diff --git a/sandbox/plugins/analytics-backend-datafusion/src/test/java/org/opensearch/be/datafusion/DataFusionJniBridgeTests.java b/sandbox/plugins/analytics-backend-datafusion/src/test/java/org/opensearch/be/datafusion/DataFusionNativeBridgeTests.java similarity index 81% rename from sandbox/plugins/analytics-backend-datafusion/src/test/java/org/opensearch/be/datafusion/DataFusionJniBridgeTests.java rename to sandbox/plugins/analytics-backend-datafusion/src/test/java/org/opensearch/be/datafusion/DataFusionNativeBridgeTests.java index 0e43d31264fdc..fa802dc74bb34 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/test/java/org/opensearch/be/datafusion/DataFusionJniBridgeTests.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/test/java/org/opensearch/be/datafusion/DataFusionNativeBridgeTests.java @@ -8,7 +8,8 @@ package org.opensearch.be.datafusion; -import org.opensearch.be.datafusion.jni.NativeBridge; +import org.opensearch.be.datafusion.nativelib.NativeBridge; +import org.opensearch.be.datafusion.nativelib.ReaderHandle; import org.opensearch.test.OpenSearchTestCase; import java.nio.file.Files; @@ -18,7 +19,7 @@ * Smoke test for the DataFusion JNI bridge. * Verifies native library loading, runtime creation, and reader lifecycle. */ -public class DataFusionJniBridgeTests extends OpenSearchTestCase { +public class DataFusionNativeBridgeTests extends OpenSearchTestCase { public void testRuntimeLifecycle() { // Init tokio runtime @@ -50,11 +51,11 @@ public void testReaderLifecycle() throws Exception { Files.copy(testParquet, dataDir.resolve("test.parquet")); // Create reader - long readerPtr = NativeBridge.createDatafusionReader(dataDir.toString(), new String[] { "test.parquet" }); - assertTrue("Reader pointer should be non-zero", readerPtr != 0); + ReaderHandle readerHandle = new ReaderHandle(dataDir.toString(), new String[] { "test.parquet" }); + assertTrue("Reader pointer should be non-zero", readerHandle.getPointer() != 0); // Close reader - NativeBridge.closeDatafusionReader(readerPtr); + readerHandle.close(); NativeBridge.closeGlobalRuntime(runtimePtr); NativeBridge.shutdownTokioRuntimeManager(); diff --git a/sandbox/plugins/analytics-backend-datafusion/src/test/java/org/opensearch/be/datafusion/DataFusionQueryExecutionTests.java b/sandbox/plugins/analytics-backend-datafusion/src/test/java/org/opensearch/be/datafusion/DataFusionQueryExecutionTests.java index afedac17d3710..e8fbaa1545c05 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/test/java/org/opensearch/be/datafusion/DataFusionQueryExecutionTests.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/test/java/org/opensearch/be/datafusion/DataFusionQueryExecutionTests.java @@ -15,7 +15,9 @@ import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.types.pojo.Schema; -import org.opensearch.be.datafusion.jni.NativeBridge; +import org.opensearch.be.datafusion.nativelib.NativeBridge; +import org.opensearch.be.datafusion.nativelib.ReaderHandle; +import org.opensearch.be.datafusion.nativelib.StreamHandle; import org.opensearch.core.action.ActionListener; import org.opensearch.test.OpenSearchTestCase; @@ -34,31 +36,28 @@ */ public class DataFusionQueryExecutionTests extends OpenSearchTestCase { - private long runtimePtr; - private long readerPtr; - - private static boolean runtimeInitialized = false; + private NativeRuntimeHandle runtimeHandle; + private ReaderHandle readerHandle; @Override public void setUp() throws Exception { super.setUp(); - if (runtimeInitialized == false) { - NativeBridge.initTokioRuntimeManager(2); - runtimeInitialized = true; - } + NativeBridge.initTokioRuntimeManager(2); Path spillDir = createTempDir("datafusion-spill"); - runtimePtr = NativeBridge.createGlobalRuntime(128 * 1024 * 1024, 0L, spillDir.toString(), 64 * 1024 * 1024); + runtimeHandle = new NativeRuntimeHandle( + NativeBridge.createGlobalRuntime(128 * 1024 * 1024, 0L, spillDir.toString(), 64 * 1024 * 1024) + ); Path dataDir = createTempDir("datafusion-data"); Path testParquet = Path.of(getClass().getClassLoader().getResource("test.parquet").toURI()); Files.copy(testParquet, dataDir.resolve("test.parquet")); - readerPtr = NativeBridge.createDatafusionReader(dataDir.toString(), new String[] { "test.parquet" }); + readerHandle = new ReaderHandle(dataDir.toString(), new String[] { "test.parquet" }); } @Override public void tearDown() throws Exception { - NativeBridge.closeDatafusionReader(readerPtr); - NativeBridge.closeGlobalRuntime(runtimePtr); + readerHandle.close(); + runtimeHandle.close(); super.tearDown(); } @@ -92,29 +91,36 @@ public void testAggregationQuery() throws Exception { */ private List executeQuery(String sql) { // Step 1: SQL → Substrait (test helper) - byte[] substraitBytes = NativeBridge.sqlToSubstrait(readerPtr, "test_table", sql, runtimePtr); + byte[] substraitBytes = NativeBridge.sqlToSubstrait(readerHandle.getPointer(), "test_table", sql, runtimeHandle.get()); assertNotNull(substraitBytes); assertTrue(substraitBytes.length > 0); // Step 2: executeQueryAsync (production path) long streamPtr = asyncCall( - listener -> NativeBridge.executeQueryAsync(readerPtr, "test_table", substraitBytes, runtimePtr, listener) + listener -> NativeBridge.executeQueryAsync( + readerHandle.getPointer(), + "test_table", + substraitBytes, + runtimeHandle.get(), + listener + ) ); assertTrue(streamPtr != 0); // Step 3: Read results via Arrow C Data try ( + StreamHandle streamHandle = new StreamHandle(streamPtr, runtimeHandle); RootAllocator allocator = new RootAllocator(Long.MAX_VALUE); CDataDictionaryProvider dictProvider = new CDataDictionaryProvider() ) { - long schemaAddr = asyncCall(listener -> NativeBridge.streamGetSchema(streamPtr, listener)); + long schemaAddr = asyncCall(listener -> NativeBridge.streamGetSchema(streamHandle.getPointer(), listener)); Schema schema = new Schema(importField(allocator, ArrowSchema.wrap(schemaAddr), dictProvider).getChildren(), null); VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator); List rows = new ArrayList<>(); while (true) { - long arrayAddr = asyncCall(listener -> NativeBridge.streamNext(runtimePtr, streamPtr, listener)); + long arrayAddr = asyncCall(listener -> NativeBridge.streamNext(runtimeHandle.get(), streamHandle.getPointer(), listener)); if (arrayAddr == 0) break; Data.importIntoVectorSchemaRoot(allocator, ArrowArray.wrap(arrayAddr), root, dictProvider); int cols = root.getFieldVectors().size(); @@ -127,7 +133,6 @@ private List executeQuery(String sql) { } } root.close(); - NativeBridge.streamClose(streamPtr); return rows; } } diff --git a/sandbox/plugins/analytics-backend-datafusion/src/test/java/org/opensearch/be/datafusion/DataFusionServiceTests.java b/sandbox/plugins/analytics-backend-datafusion/src/test/java/org/opensearch/be/datafusion/DataFusionServiceTests.java index 850614b34cd46..f4b4185fe75c5 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/test/java/org/opensearch/be/datafusion/DataFusionServiceTests.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/test/java/org/opensearch/be/datafusion/DataFusionServiceTests.java @@ -8,7 +8,7 @@ package org.opensearch.be.datafusion; -import org.opensearch.be.datafusion.jni.NativeBridge; +import org.opensearch.be.datafusion.nativelib.NativeBridge; import org.opensearch.test.OpenSearchTestCase; import java.nio.file.Path; @@ -18,13 +18,8 @@ */ public class DataFusionServiceTests extends OpenSearchTestCase { - private static boolean runtimeInitialized = false; - private void ensureTokioInit() { - if (runtimeInitialized == false) { - NativeBridge.initTokioRuntimeManager(2); - runtimeInitialized = true; - } + NativeBridge.initTokioRuntimeManager(2); } public void testServiceStartStop() { @@ -79,11 +74,6 @@ public void testNativeRuntimeHandleRejectsZeroPointer() { expectThrows(IllegalArgumentException.class, () -> new NativeRuntimeHandle(0L)); } - public void testNativePanicIsCaughtAsException() { - RuntimeException ex = expectThrows(RuntimeException.class, () -> NativeBridge.testPanic("test panic message")); - assertTrue("Should contain panic message, got: " + ex.getMessage(), ex.getMessage().contains("test panic message")); - } - public void testCacheFileOperationsDoNotThrow() { ensureTokioInit(); Path spillDir = createTempDir("spill"); diff --git a/sandbox/plugins/analytics-backend-datafusion/src/test/java/org/opensearch/be/datafusion/DatafusionResultStreamTests.java b/sandbox/plugins/analytics-backend-datafusion/src/test/java/org/opensearch/be/datafusion/DatafusionResultStreamTests.java index 9c337a4e73973..e353e096be260 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/test/java/org/opensearch/be/datafusion/DatafusionResultStreamTests.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/test/java/org/opensearch/be/datafusion/DatafusionResultStreamTests.java @@ -11,7 +11,8 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.opensearch.analytics.backend.EngineResultBatch; -import org.opensearch.be.datafusion.jni.NativeBridge; +import org.opensearch.be.datafusion.nativelib.NativeBridge; +import org.opensearch.be.datafusion.nativelib.ReaderHandle; import org.opensearch.core.action.ActionListener; import org.opensearch.test.OpenSearchTestCase; @@ -27,19 +28,14 @@ */ public class DatafusionResultStreamTests extends OpenSearchTestCase { - private long readerPtr; + private ReaderHandle readerHandle; private NativeRuntimeHandle runtimeHandle; private RootAllocator testRootAllocator; - private static boolean runtimeInitialized = false; - @Override public void setUp() throws Exception { super.setUp(); - if (runtimeInitialized == false) { - NativeBridge.initTokioRuntimeManager(2); - runtimeInitialized = true; - } + NativeBridge.initTokioRuntimeManager(2); Path spillDir = createTempDir("spill"); long ptr = NativeBridge.createGlobalRuntime(128 * 1024 * 1024, 0L, spillDir.toString(), 64 * 1024 * 1024); runtimeHandle = new NativeRuntimeHandle(ptr); @@ -48,12 +44,12 @@ public void setUp() throws Exception { Path dataDir = createTempDir("data"); Path testParquet = Path.of(getClass().getClassLoader().getResource("test.parquet").toURI()); Files.copy(testParquet, dataDir.resolve("test.parquet")); - readerPtr = NativeBridge.createDatafusionReader(dataDir.toString(), new String[] { "test.parquet" }); + readerHandle = new ReaderHandle(dataDir.toString(), new String[] { "test.parquet" }); } @Override public void tearDown() throws Exception { - NativeBridge.closeDatafusionReader(readerPtr); + readerHandle.close(); runtimeHandle.close(); testRootAllocator.close(); super.tearDown(); @@ -142,17 +138,23 @@ public void testBatchFieldAccess() throws Exception { public void testNativeQueryFailureDoesNotLeak() { // Invalid substrait bytes should cause native failure — verify error propagates and no leak CompletableFuture future = new CompletableFuture<>(); - NativeBridge.executeQueryAsync(readerPtr, "test_table", new byte[] { 0, 1, 2 }, runtimeHandle.get(), new ActionListener<>() { - @Override - public void onResponse(Long ptr) { - future.complete(ptr); - } - - @Override - public void onFailure(Exception e) { - future.completeExceptionally(e); + NativeBridge.executeQueryAsync( + readerHandle.getPointer(), + "test_table", + new byte[] { 0, 1, 2 }, + runtimeHandle.get(), + new ActionListener<>() { + @Override + public void onResponse(Long ptr) { + future.complete(ptr); + } + + @Override + public void onFailure(Exception e) { + future.completeExceptionally(e); + } } - }); + ); Exception ex = expectThrows(Exception.class, future::join); assertNotNull("Native error should propagate", ex.getCause()); assertTrue( @@ -170,9 +172,14 @@ public void testCloseAfterNativeStreamNextFailure() throws Exception { long ptr2 = NativeBridge.createGlobalRuntime(128 * 1024 * 1024, 0L, spillDir2.toString(), 64 * 1024 * 1024); NativeRuntimeHandle tempRuntime = new NativeRuntimeHandle(ptr2); - byte[] substrait = NativeBridge.sqlToSubstrait(readerPtr, "test_table", "SELECT message FROM test_table", runtimeHandle.get()); + byte[] substrait = NativeBridge.sqlToSubstrait( + readerHandle.getPointer(), + "test_table", + "SELECT message FROM test_table", + runtimeHandle.get() + ); CompletableFuture future = new CompletableFuture<>(); - NativeBridge.executeQueryAsync(readerPtr, "test_table", substrait, tempRuntime.get(), new ActionListener<>() { + NativeBridge.executeQueryAsync(readerHandle.getPointer(), "test_table", substrait, tempRuntime.get(), new ActionListener<>() { @Override public void onResponse(Long p) { future.complete(p); @@ -186,7 +193,7 @@ public void onFailure(Exception e) { long streamPtr = future.join(); DatafusionResultStream stream = new DatafusionResultStream( - new org.opensearch.be.datafusion.jni.StreamHandle(streamPtr, tempRuntime), + new org.opensearch.be.datafusion.nativelib.StreamHandle(streamPtr, tempRuntime), testRootAllocator.newChildAllocator("test-failure", 0, Long.MAX_VALUE) ); @@ -213,9 +220,9 @@ public void testDoubleCloseIsHarmless() throws Exception { } private DatafusionResultStream createStream(String sql) { - byte[] substrait = NativeBridge.sqlToSubstrait(readerPtr, "test_table", sql, runtimeHandle.get()); + byte[] substrait = NativeBridge.sqlToSubstrait(readerHandle.getPointer(), "test_table", sql, runtimeHandle.get()); CompletableFuture future = new CompletableFuture<>(); - NativeBridge.executeQueryAsync(readerPtr, "test_table", substrait, runtimeHandle.get(), new ActionListener<>() { + NativeBridge.executeQueryAsync(readerHandle.getPointer(), "test_table", substrait, runtimeHandle.get(), new ActionListener<>() { @Override public void onResponse(Long ptr) { future.complete(ptr); @@ -228,6 +235,9 @@ public void onFailure(Exception e) { }); long streamPtr = future.join(); BufferAllocator childAllocator = testRootAllocator.newChildAllocator("test-stream", 0, Long.MAX_VALUE); - return new DatafusionResultStream(new org.opensearch.be.datafusion.jni.StreamHandle(streamPtr, runtimeHandle), childAllocator); + return new DatafusionResultStream( + new org.opensearch.be.datafusion.nativelib.StreamHandle(streamPtr, runtimeHandle), + childAllocator + ); } } diff --git a/sandbox/plugins/analytics-backend-datafusion/src/test/java/org/opensearch/be/datafusion/DatafusionSearchExecEngineTests.java b/sandbox/plugins/analytics-backend-datafusion/src/test/java/org/opensearch/be/datafusion/DatafusionSearchExecEngineTests.java index 8c084eebabc77..1d7a45071ab0e 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/test/java/org/opensearch/be/datafusion/DatafusionSearchExecEngineTests.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/test/java/org/opensearch/be/datafusion/DatafusionSearchExecEngineTests.java @@ -8,9 +8,11 @@ package org.opensearch.be.datafusion; +import org.apache.lucene.tests.util.LuceneTestCase.AwaitsFix; import org.opensearch.analytics.backend.EngineResultBatch; import org.opensearch.analytics.backend.EngineResultStream; -import org.opensearch.be.datafusion.jni.NativeBridge; +import org.opensearch.be.datafusion.nativelib.NativeBridge; +import org.opensearch.be.datafusion.nativelib.ReaderHandle; import org.opensearch.test.OpenSearchTestCase; import java.nio.file.Files; @@ -24,20 +26,16 @@ * DatafusionReader → DatafusionContext → DatafusionSearchExecEngine → EngineResultStream → EngineResultBatch. * Uses sqlToSubstrait to generate plan bytes, then exercises the real plugin classes. */ +@AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/21195") public class DatafusionSearchExecEngineTests extends OpenSearchTestCase { - private long readerPtr; + private ReaderHandle readerHandle; private NativeRuntimeHandle runtimeHandle; - private static boolean runtimeInitialized = false; - @Override public void setUp() throws Exception { super.setUp(); - if (runtimeInitialized == false) { - NativeBridge.initTokioRuntimeManager(2); - runtimeInitialized = true; - } + NativeBridge.initTokioRuntimeManager(2); Path spillDir = createTempDir("datafusion-spill"); long ptr = NativeBridge.createGlobalRuntime(128 * 1024 * 1024, 0L, spillDir.toString(), 64 * 1024 * 1024); runtimeHandle = new NativeRuntimeHandle(ptr); @@ -45,12 +43,12 @@ public void setUp() throws Exception { Path dataDir = createTempDir("datafusion-data"); Path testParquet = Path.of(getClass().getClassLoader().getResource("test.parquet").toURI()); Files.copy(testParquet, dataDir.resolve("test.parquet")); - readerPtr = NativeBridge.createDatafusionReader(dataDir.toString(), new String[] { "test.parquet" }); + readerHandle = new ReaderHandle(dataDir.toString(), new String[] { "test.parquet" }); } @Override public void tearDown() throws Exception { - NativeBridge.closeDatafusionReader(readerPtr); + readerHandle.close(); // NativeRuntimeHandle.close() calls closeGlobalRuntime runtimeHandle.close(); super.tearDown(); @@ -58,7 +56,7 @@ public void tearDown() throws Exception { public void testEngineExecuteSelectAll() throws Exception { byte[] substrait = NativeBridge.sqlToSubstrait( - readerPtr, + readerHandle.getPointer(), "test_table", "SELECT message, message2 FROM test_table", runtimeHandle.get() @@ -88,7 +86,7 @@ public void testEngineExecuteSelectAll() throws Exception { public void testEngineExecuteAggregation() throws Exception { byte[] substrait = NativeBridge.sqlToSubstrait( - readerPtr, + readerHandle.getPointer(), "test_table", "SELECT SUM(message) as total FROM test_table", runtimeHandle.get() @@ -114,7 +112,7 @@ public void testEngineExecuteAggregation() throws Exception { public void testEngineExecuteFilter() throws Exception { byte[] substrait = NativeBridge.sqlToSubstrait( - readerPtr, + readerHandle.getPointer(), "test_table", "SELECT message FROM test_table WHERE message = 3", runtimeHandle.get() @@ -140,7 +138,7 @@ public void testEngineExecuteFilter() throws Exception { private DatafusionReader createReader() { // Wrap the raw pointer in a ReaderHandle via the existing native pointer - return new DatafusionReader(readerPtr); + return new DatafusionReader(readerHandle.getPointer()); } private List collectRows(EngineResultStream stream) { diff --git a/sandbox/plugins/parquet-data-format/benchmarks/build.gradle b/sandbox/plugins/parquet-data-format/benchmarks/build.gradle index f7d4eda6e983e..ee90cb6d2301b 100644 --- a/sandbox/plugins/parquet-data-format/benchmarks/build.gradle +++ b/sandbox/plugins/parquet-data-format/benchmarks/build.gradle @@ -9,6 +9,8 @@ apply plugin: 'opensearch.build' apply plugin: 'application' +java { sourceCompatibility = JavaVersion.toVersion(25); targetCompatibility = JavaVersion.toVersion(25) } + assemble.enabled = false application { diff --git a/sandbox/plugins/parquet-data-format/build.gradle b/sandbox/plugins/parquet-data-format/build.gradle index 7b54f54f50e98..f00bc5c04af09 100644 --- a/sandbox/plugins/parquet-data-format/build.gradle +++ b/sandbox/plugins/parquet-data-format/build.gradle @@ -11,11 +11,11 @@ opensearchplugin { classname = 'org.opensearch.parquet.ParquetDataFormatPlugin' } -def buildType = project.hasProperty('rustDebug') ? 'debug' : 'release' +java { sourceCompatibility = JavaVersion.toVersion(25); targetCompatibility = JavaVersion.toVersion(25) } dependencies { - // Shared native bridge lib - implementation project(':sandbox:libs:native-bridge-spi') + // Shared native bridge lib (provides the unified .so and FFM SymbolLookup) + implementation project(':sandbox:libs:dataformat-native') // Apache Arrow dependencies implementation "org.apache.arrow:arrow-vector:${versions.arrow}" @@ -48,133 +48,20 @@ tasks.named('thirdPartyAudit').configure { ) } -// --- Rust native library build tasks --- - -def rustDir = file("${projectDir}/src/main/rust") - -def isCargoInstalled() { - def possiblePaths = [ - System.getenv('HOME') + '/.cargo/bin/cargo', - '/usr/local/bin/cargo' - ] - for (String path : possiblePaths) { - if (new File(path).exists()) { - return true - } - } - try { - def process = 'which cargo'.execute() - process.waitFor() - return process.exitValue() == 0 - } catch (Exception e) { - return false - } -} - -// Resolves cargo path at execution time (after installRust may have run) -def findCargo() { - def possiblePaths = [ - System.getenv('HOME') + '/.cargo/bin/cargo', - '/usr/local/bin/cargo', - 'cargo' - ] - for (String path : possiblePaths) { - if (new File(path).exists()) { - return path - } - } - return 'cargo' -} - -tasks.register('installRust', Exec) { - description = 'Install Rust toolchain via rustup if cargo is not available' - group = 'build' - onlyIf { !isCargoInstalled() } - commandLine 'sh', '-c', 'curl --proto \'=https\' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y' -} - -tasks.register('buildRust', Exec) { - description = 'Build the Rust JNI library using Cargo' - group = 'build' - dependsOn installRust - workingDir = rustDir - environment 'CARGO_TARGET_DIR', file("${rustDir}/target").absolutePath - - inputs.files fileTree("${rustDir}/src") - inputs.file "${rustDir}/Cargo.toml" - outputs.dir "${rustDir}/target/${buildType}" - - doFirst { - def cargoArgs = [findCargo(), 'build'] - if (buildType == 'release') { - cargoArgs.add('--release') - } - executable cargoArgs[0] - args cargoArgs.drop(1) - } -} - -tasks.register('copyNativeLib', Copy) { - dependsOn buildRust - from "${rustDir}/target/${buildType}" - into "src/main/resources/native" - include "libparquet_dataformat_jni.dylib" - include "libparquet_dataformat_jni.so" - include "parquet_dataformat_jni.dll" - duplicatesStrategy = DuplicatesStrategy.EXCLUDE - // Normalize permissions to non-executable (0644) so that the filepermissions precommit check does not flag .so files - filePermissions { - unix("0644") - } - - eachFile { file -> - def os = System.getProperty('os.name').toLowerCase() - def arch = System.getProperty('os.arch').toLowerCase() - def osDir = os.contains('win') ? 'windows' : os.contains('mac') ? 'macos' : 'linux' - def archDir = (arch.contains('aarch64') || arch.contains('arm64')) ? 'aarch64' : 'x86_64' - file.path = "${osDir}-${archDir}/${file.name}" - } -} - -tasks.register('runNativeUnitTests', Exec) { - description = 'Run Rust unit and integration tests' - group = 'verification' - dependsOn installRust - workingDir = rustDir - doFirst { - executable findCargo() - args 'test' - } -} - -// Wire Rust build into Java build lifecycle -compileJava.dependsOn copyNativeLib -processResources.dependsOn copyNativeLib - -// Ensure tasks that scan or package src/main/resources run after native lib is copied -['filepermissions', 'forbiddenPatterns', 'spotlessJava', 'spotlessJavaCheck', 'spotlessMisc', 'spotlessMiscCheck', 'sourcesJar'].each { taskName -> - tasks.named(taskName).configure { - dependsOn copyNativeLib - } -} -tasks.named('forbiddenPatterns').configure { - exclude '**/native/**' -} - spotless { format 'misc', { targetExclude '**/src/main/rust/target/**' } } -clean { - doFirst { - delete "src/main/resources/native" - delete "${rustDir}/target" - } -} - test { jvmArgs '--add-opens=java.base/java.nio=ALL-UNNAMED' jvmArgs '--add-opens=java.base/sun.nio.ch=ALL-UNNAMED' + jvmArgs '--enable-native-access=ALL-UNNAMED' + systemProperty 'native.lib.path', project(':sandbox:libs:dataformat-native').ext.nativeLibPath.absolutePath + dependsOn ':sandbox:libs:dataformat-native:buildRustLibrary' +} + +tasks.matching { it.name == 'missingJavadoc' }.configureEach { + enabled = false } diff --git a/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/bridge/RustBridge.java b/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/bridge/RustBridge.java index 24e6d5cbbc7c3..d8cbf1b6c8b74 100644 --- a/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/bridge/RustBridge.java +++ b/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/bridge/RustBridge.java @@ -8,58 +8,154 @@ package org.opensearch.parquet.bridge; -import org.opensearch.nativebridge.spi.PlatformHelper; +import org.opensearch.nativebridge.spi.NativeCall; +import org.opensearch.nativebridge.spi.NativeLibraryLoader; import java.io.IOException; +import java.lang.foreign.FunctionDescriptor; +import java.lang.foreign.Linker; +import java.lang.foreign.SymbolLookup; +import java.lang.foreign.ValueLayout; +import java.lang.invoke.MethodHandle; +import java.nio.charset.StandardCharsets; -/** - * JNI bridge to the native Rust Parquet writer library ({@code parquet_dataformat_jni}). - * - *

Provides static native methods that operate on Arrow C Data Interface memory addresses. - * The native library is loaded from the classpath resource at - * {@code /native/{os}-{arch}/libparquet_dataformat_jni.{so|dylib|dll}}, falling back to - * {@link System#loadLibrary(String)} if the resource is not found. - * - *

Writer lifecycle methods are package-private and should only be called through - * {@link NativeParquetWriter}. - */ public class RustBridge { - private static final String LIB_NAME = "parquet_dataformat_jni"; + private static final MethodHandle CREATE_WRITER; + private static final MethodHandle WRITE; + private static final MethodHandle FINALIZE_WRITER; + private static final MethodHandle SYNC_TO_DISK; + private static final MethodHandle GET_FILE_METADATA; + private static final MethodHandle GET_FILTERED_BYTES; static { - PlatformHelper.loadNativeLibrary(LIB_NAME, RustBridge.class); + SymbolLookup lib = NativeLibraryLoader.symbolLookup(); + Linker linker = Linker.nativeLinker(); + CREATE_WRITER = linker.downcallHandle( + lib.find("parquet_create_writer").orElseThrow(), + FunctionDescriptor.of(ValueLayout.JAVA_LONG, ValueLayout.ADDRESS, ValueLayout.JAVA_LONG, ValueLayout.JAVA_LONG) + ); + WRITE = linker.downcallHandle( + lib.find("parquet_write").orElseThrow(), + FunctionDescriptor.of( + ValueLayout.JAVA_LONG, + ValueLayout.ADDRESS, + ValueLayout.JAVA_LONG, + ValueLayout.JAVA_LONG, + ValueLayout.JAVA_LONG + ) + ); + FINALIZE_WRITER = linker.downcallHandle( + lib.find("parquet_finalize_writer").orElseThrow(), + FunctionDescriptor.of( + ValueLayout.JAVA_LONG, + ValueLayout.ADDRESS, + ValueLayout.JAVA_LONG, + ValueLayout.ADDRESS, + ValueLayout.ADDRESS, + ValueLayout.ADDRESS, + ValueLayout.JAVA_LONG, + ValueLayout.ADDRESS + ) + ); + SYNC_TO_DISK = linker.downcallHandle( + lib.find("parquet_sync_to_disk").orElseThrow(), + FunctionDescriptor.of(ValueLayout.JAVA_LONG, ValueLayout.ADDRESS, ValueLayout.JAVA_LONG) + ); + GET_FILE_METADATA = linker.downcallHandle( + lib.find("parquet_get_file_metadata").orElseThrow(), + FunctionDescriptor.of( + ValueLayout.JAVA_LONG, + ValueLayout.ADDRESS, + ValueLayout.JAVA_LONG, + ValueLayout.ADDRESS, + ValueLayout.ADDRESS, + ValueLayout.ADDRESS, + ValueLayout.JAVA_LONG, + ValueLayout.ADDRESS + ) + ); + GET_FILTERED_BYTES = linker.downcallHandle( + lib.find("parquet_get_filtered_native_bytes_used").orElseThrow(), + FunctionDescriptor.of(ValueLayout.JAVA_LONG, ValueLayout.ADDRESS, ValueLayout.JAVA_LONG) + ); } - /** Initializes the native Rust logger. */ - public static native void initLogger(); + public static void initLogger() {} - // Writer lifecycle methods — package-private, controlled by NativeParquetWriter - static native void createWriter(String file, long schemaAddress) throws IOException; + static void createWriter(String file, long schemaAddress) throws IOException { + try (var call = new NativeCall()) { + var f = call.str(file); + call.invokeIO(CREATE_WRITER, f.segment(), f.len(), schemaAddress); + } + } - static native void write(String file, long arrayAddress, long schemaAddress) throws IOException; + static void write(String file, long arrayAddress, long schemaAddress) throws IOException { + try (var call = new NativeCall()) { + var f = call.str(file); + call.invokeIO(WRITE, f.segment(), f.len(), arrayAddress, schemaAddress); + } + } - static native ParquetFileMetadata finalizeWriter(String file) throws IOException; + static ParquetFileMetadata finalizeWriter(String file) throws IOException { + try (var call = new NativeCall()) { + var f = call.str(file); + var versionOut = call.intOut(); + var numRowsOut = call.longOut(); + var out = call.outBuffer(1024); + long rc = call.invokeIO( + FINALIZE_WRITER, + f.segment(), + f.len(), + versionOut, + numRowsOut, + out.data(), + (long) out.capacity(), + out.lenOut() + ); + if (rc == 1) return null; + int createdByLen = out.actualLength(); + return new ParquetFileMetadata( + versionOut.get(ValueLayout.JAVA_INT, 0), + numRowsOut.get(ValueLayout.JAVA_LONG, 0), + createdByLen >= 0 + ? new String(out.data().asSlice(0, createdByLen).toArray(ValueLayout.JAVA_BYTE), StandardCharsets.UTF_8) + : null + ); + } + } - static native void syncToDisk(String file) throws IOException; + static void syncToDisk(String file) throws IOException { + try (var call = new NativeCall()) { + var f = call.str(file); + call.invokeIO(SYNC_TO_DISK, f.segment(), f.len()); + } + } - // Public utility methods - /** - * Returns metadata for the specified Parquet file. - * - * @param file the path to the Parquet file - * @return the file metadata - * @throws IOException if the metadata cannot be read - */ - public static native ParquetFileMetadata getFileMetadata(String file) throws IOException; + public static ParquetFileMetadata getFileMetadata(String file) throws IOException { + try (var call = new NativeCall()) { + var f = call.str(file); + var versionOut = call.intOut(); + var numRowsOut = call.longOut(); + var out = call.outBuffer(1024); + call.invokeIO(GET_FILE_METADATA, f.segment(), f.len(), versionOut, numRowsOut, out.data(), (long) out.capacity(), out.lenOut()); + int createdByLen = out.actualLength(); + return new ParquetFileMetadata( + versionOut.get(ValueLayout.JAVA_INT, 0), + numRowsOut.get(ValueLayout.JAVA_LONG, 0), + createdByLen >= 0 + ? new String(out.data().asSlice(0, createdByLen).toArray(ValueLayout.JAVA_BYTE), StandardCharsets.UTF_8) + : null + ); + } + } - /** - * Returns the native memory bytes used by files matching the given path prefix. - * - * @param pathPrefix the path prefix to filter by - * @return the number of native bytes used - */ - public static native long getFilteredNativeBytesUsed(String pathPrefix); + public static long getFilteredNativeBytesUsed(String pathPrefix) { + try (var call = new NativeCall()) { + var p = call.str(pathPrefix); + return call.invoke(GET_FILTERED_BYTES, p.segment(), p.len()); + } + } private RustBridge() {} } diff --git a/sandbox/plugins/parquet-data-format/src/main/rust/Cargo.toml b/sandbox/plugins/parquet-data-format/src/main/rust/Cargo.toml index 62aeefb05ef27..1e867a6f76e3c 100644 --- a/sandbox/plugins/parquet-data-format/src/main/rust/Cargo.toml +++ b/sandbox/plugins/parquet-data-format/src/main/rust/Cargo.toml @@ -1,38 +1,30 @@ [package] -name = "parquet-dataformat-jni" +name = "opensearch-parquet-format" version = "0.1.0" edition = "2021" +license = "Apache-2.0" +workspace = "../../../../../libs/dataformat-native/rust" [features] test-utils = [] [lib] -name = "parquet_dataformat_jni" -crate-type = ["cdylib", "lib"] +name = "opensearch_parquet_format" +crate-type = ["rlib"] [dependencies] -arrow = { version = "54.0.0", features = ["ffi"] } -arrow-array = "54.0.0" -arrow-schema = "54.0.0" -arrow-buffer = "54.0.0" -jni = "0.21" -serde = { version = "1.0", features = ["derive"] } -serde_json = "1.0" -anyhow = "1.0" -thiserror = "1.0" -log = "0.4" -parquet = "54.0.0" -lazy_static = "1.4.0" -dashmap = "6.1" -chrono = "0.4" -mimalloc = { version = "0.1.48", default-features = false } -tempfile = "3.0" - -native-bridge-spi = { path = "../../../../../libs/native-bridge-spi/src/main/rust" } +arrow = { workspace = true } +arrow-array = { workspace = true } +arrow-schema = { workspace = true } +arrow-buffer = { workspace = true } +log = { workspace = true } +parquet = { workspace = true } +lazy_static = { workspace = true } +dashmap = { workspace = true } +chrono = { workspace = true } +mimalloc = { workspace = true } +tempfile = { workspace = true } +native-bridge-common = { workspace = true } [dev-dependencies] -parquet_dataformat_jni = { path = ".", package = "parquet-dataformat-jni", features = ["test-utils"] } - -[profile.release] -debug = "line-tables-only" -strip = false +opensearch-parquet-format = { path = ".", features = ["test-utils"] } diff --git a/sandbox/plugins/parquet-data-format/src/main/rust/src/ffm.rs b/sandbox/plugins/parquet-data-format/src/main/rust/src/ffm.rs new file mode 100644 index 0000000000000..a1b2c4856e75a --- /dev/null +++ b/sandbox/plugins/parquet-data-format/src/main/rust/src/ffm.rs @@ -0,0 +1,140 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +//! FFM bridge for the Parquet writer. +//! +//! Return convention: `>= 0` success, `< 0` error pointer (negate to get ptr, +//! call `native_error_message`/`native_error_free`). + +use std::slice; +use std::str; + +use native_bridge_common::ffm_safe; + +use crate::writer::NativeParquetWriter; + +unsafe fn str_from_raw<'a>(ptr: *const u8, len: i64) -> Result<&'a str, String> { + if ptr.is_null() { + return Err("null string pointer".to_string()); + } + if len < 0 { + return Err(format!("negative string length: {}", len)); + } + let bytes = slice::from_raw_parts(ptr, len as usize); + str::from_utf8(bytes).map_err(|e| format!("invalid UTF-8: {}", e)) +} + +#[ffm_safe] +#[no_mangle] +pub unsafe extern "C" fn parquet_create_writer( + file_ptr: *const u8, + file_len: i64, + schema_address: i64, +) -> i64 { + let filename = str_from_raw(file_ptr, file_len).map_err(|e| format!("parquet_create_writer: {}", e))?.to_string(); + NativeParquetWriter::create_writer(filename, schema_address) + .map(|_| 0) + .map_err(|e| e.to_string()) +} + +#[ffm_safe] +#[no_mangle] +pub unsafe extern "C" fn parquet_write( + file_ptr: *const u8, + file_len: i64, + array_address: i64, + schema_address: i64, +) -> i64 { + let filename = str_from_raw(file_ptr, file_len).map_err(|e| format!("parquet_write: {}", e))?.to_string(); + NativeParquetWriter::write_data(filename, array_address, schema_address) + .map(|_| 0) + .map_err(|e| e.to_string()) +} + +/// Returns 0 with metadata in out-pointers, 1 if no writer found. +#[ffm_safe] +#[no_mangle] +pub unsafe extern "C" fn parquet_finalize_writer( + file_ptr: *const u8, + file_len: i64, + version_out: *mut i32, + num_rows_out: *mut i64, + created_by_buf: *mut u8, + created_by_buf_len: i64, + created_by_len_out: *mut i64, +) -> i64 { + let filename = str_from_raw(file_ptr, file_len).map_err(|e| format!("parquet_finalize_writer: {}", e))?.to_string(); + match NativeParquetWriter::finalize_writer(filename) { + Ok(Some(metadata)) => { + if !version_out.is_null() { *version_out = metadata.version; } + if !num_rows_out.is_null() { *num_rows_out = metadata.num_rows; } + if let Some(ref cb) = metadata.created_by { + if !created_by_buf.is_null() && created_by_buf_len > 0 { + let bytes = cb.as_bytes(); + let n = bytes.len().min(created_by_buf_len as usize); + std::ptr::copy_nonoverlapping(bytes.as_ptr(), created_by_buf, n); + if !created_by_len_out.is_null() { *created_by_len_out = n as i64; } + } + } else if !created_by_len_out.is_null() { + *created_by_len_out = -1; + } + Ok(0) + } + Ok(None) => Ok(1), + Err(e) => Err(e.to_string()), + } +} + +#[ffm_safe] +#[no_mangle] +pub unsafe extern "C" fn parquet_sync_to_disk( + file_ptr: *const u8, + file_len: i64, +) -> i64 { + let filename = str_from_raw(file_ptr, file_len).map_err(|e| format!("parquet_sync_to_disk: {}", e))?.to_string(); + NativeParquetWriter::sync_to_disk(filename) + .map(|_| 0) + .map_err(|e| e.to_string()) +} + +#[ffm_safe] +#[no_mangle] +pub unsafe extern "C" fn parquet_get_file_metadata( + file_ptr: *const u8, + file_len: i64, + version_out: *mut i32, + num_rows_out: *mut i64, + created_by_buf: *mut u8, + created_by_buf_len: i64, + created_by_len_out: *mut i64, +) -> i64 { + let filename = str_from_raw(file_ptr, file_len).map_err(|e| format!("parquet_get_file_metadata: {}", e))?.to_string(); + let fm = NativeParquetWriter::get_file_metadata(filename).map_err(|e| e.to_string())?; + if !version_out.is_null() { *version_out = fm.version(); } + if !num_rows_out.is_null() { *num_rows_out = fm.num_rows(); } + if let Some(cb) = fm.created_by() { + if !created_by_buf.is_null() && created_by_buf_len > 0 { + let bytes = cb.as_bytes(); + let n = bytes.len().min(created_by_buf_len as usize); + std::ptr::copy_nonoverlapping(bytes.as_ptr(), created_by_buf, n); + if !created_by_len_out.is_null() { *created_by_len_out = n as i64; } + } + } else if !created_by_len_out.is_null() { + *created_by_len_out = -1; + } + Ok(0) +} + +#[no_mangle] +pub unsafe extern "C" fn parquet_get_filtered_native_bytes_used( + prefix_ptr: *const u8, + prefix_len: i64, +) -> i64 { + let prefix = str_from_raw(prefix_ptr, prefix_len).unwrap_or("").to_string(); + NativeParquetWriter::get_filtered_writer_memory_usage(prefix).unwrap_or(0) as i64 +} diff --git a/sandbox/plugins/parquet-data-format/src/main/rust/src/jni.rs b/sandbox/plugins/parquet-data-format/src/main/rust/src/jni.rs deleted file mode 100644 index ddfcbd7aec757..0000000000000 --- a/sandbox/plugins/parquet-data-format/src/main/rust/src/jni.rs +++ /dev/null @@ -1,163 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -use jni::objects::{JClass, JObject, JString}; -use jni::sys::{jlong, jobject}; -use jni::JNIEnv; -use parquet::format::FileMetaData as FormatFileMetaData; - -use crate::log_error; -use crate::writer::NativeParquetWriter; - -#[unsafe(no_mangle)] -pub extern "system" fn Java_org_opensearch_parquet_bridge_RustBridge_initLogger( - env: JNIEnv, - _class: JClass, -) { - native_bridge_spi::init_logger_from_env(&env); -} - -#[unsafe(no_mangle)] -pub extern "system" fn Java_org_opensearch_parquet_bridge_RustBridge_createWriter( - mut env: JNIEnv, - _class: JClass, - file: JString, - schema_address: jlong, -) { - let filename: String = env.get_string(&file).expect("Couldn't get java string!").into(); - if let Err(e) = NativeParquetWriter::create_writer(filename, schema_address as i64) { - log_error!("ERROR: Failed to create writer: {:?}", e); - let _ = env.throw_new("java/io/IOException", &format!("Failed to create writer: {}", e)); - } -} - -#[unsafe(no_mangle)] -pub extern "system" fn Java_org_opensearch_parquet_bridge_RustBridge_write( - mut env: JNIEnv, - _class: JClass, - file: JString, - array_address: jlong, - schema_address: jlong, -) { - let filename: String = env.get_string(&file).expect("Couldn't get java string!").into(); - if let Err(e) = NativeParquetWriter::write_data(filename, array_address as i64, schema_address as i64) { - log_error!("ERROR: Failed to write data: {:?}", e); - let _ = env.throw_new("java/io/IOException", &format!("Failed to write data: {}", e)); - } -} - -#[unsafe(no_mangle)] -pub extern "system" fn Java_org_opensearch_parquet_bridge_RustBridge_finalizeWriter( - mut env: JNIEnv, - _class: JClass, - file: JString, -) -> jobject { - let filename: String = env.get_string(&file).expect("Couldn't get java string!").into(); - match NativeParquetWriter::finalize_writer(filename) { - Ok(Some(metadata)) => { - match create_java_metadata(&mut env, &metadata) { - Ok(java_obj) => java_obj.into_raw(), - Err(e) => { - log_error!("ERROR: Failed to create Java metadata object: {:?}", e); - let _ = env.throw_new("java/io/IOException", "Failed to create metadata object"); - JObject::null().into_raw() - } - } - } - Ok(None) => JObject::null().into_raw(), - Err(e) => { - log_error!("ERROR: Failed to finalize writer: {:?}", e); - let _ = env.throw_new("java/io/IOException", &format!("Failed to finalize writer: {}", e)); - JObject::null().into_raw() - } - } -} - -#[unsafe(no_mangle)] -pub extern "system" fn Java_org_opensearch_parquet_bridge_RustBridge_syncToDisk( - mut env: JNIEnv, - _class: JClass, - file: JString, -) { - let filename: String = env.get_string(&file).expect("Couldn't get java string!").into(); - if let Err(e) = NativeParquetWriter::sync_to_disk(filename) { - log_error!("ERROR: Failed to sync to disk: {:?}", e); - let _ = env.throw_new("java/io/IOException", &format!("Failed to sync to disk: {}", e)); - } -} - -#[unsafe(no_mangle)] -pub extern "system" fn Java_org_opensearch_parquet_bridge_RustBridge_getFilteredNativeBytesUsed( - mut env: JNIEnv, - _class: JClass, - path_prefix: JString, -) -> jlong { - let prefix: String = env.get_string(&path_prefix).expect("Couldn't get java string!").into(); - match NativeParquetWriter::get_filtered_writer_memory_usage(prefix) { - Ok(memory) => memory as jlong, - Err(e) => { - log_error!("ERROR: Failed to get filtered native bytes used: {:?}", e); - 0 - } - } -} - -#[unsafe(no_mangle)] -pub extern "system" fn Java_org_opensearch_parquet_bridge_RustBridge_getFileMetadata( - mut env: JNIEnv, - _class: JClass, - file: JString, -) -> jobject { - let filename: String = env.get_string(&file).expect("Couldn't get java string!").into(); - match NativeParquetWriter::get_file_metadata(filename) { - Ok(file_metadata) => { - let format_metadata = FormatFileMetaData { - version: file_metadata.version(), - num_rows: file_metadata.num_rows(), - created_by: file_metadata.created_by().map(|s| s.to_string()), - schema: vec![], - row_groups: vec![], - key_value_metadata: None, - encryption_algorithm: None, - footer_signing_key_metadata: None, - column_orders: None, - }; - match create_java_metadata(&mut env, &format_metadata) { - Ok(java_obj) => java_obj.into_raw(), - Err(e) => { - log_error!("ERROR: Failed to create Java metadata: {:?}", e); - let _ = env.throw_new("java/io/IOException", "Failed to create metadata object"); - JObject::null().into_raw() - } - } - } - Err(e) => { - log_error!("ERROR: Failed to get file metadata: {:?}", e); - let _ = env.throw_new("java/io/IOException", &format!("Failed to get metadata: {}", e)); - JObject::null().into_raw() - } - } -} - -fn create_java_metadata<'local>(env: &mut JNIEnv<'local>, metadata: &FormatFileMetaData) -> Result, Box> { - let class = env.find_class("org/opensearch/parquet/bridge/ParquetFileMetadata")?; - let created_by_jstring = match &metadata.created_by { - Some(created_by) => env.new_string(created_by)?, - None => JObject::null().into(), - }; - let java_metadata = env.new_object( - &class, - "(IJLjava/lang/String;)V", - &[ - (metadata.version).into(), - (metadata.num_rows).into(), - (&created_by_jstring).into(), - ], - )?; - Ok(java_metadata) -} diff --git a/sandbox/plugins/parquet-data-format/src/main/rust/src/lib.rs b/sandbox/plugins/parquet-data-format/src/main/rust/src/lib.rs index 4605e0b1cf452..c13fd3e8b5f10 100644 --- a/sandbox/plugins/parquet-data-format/src/main/rust/src/lib.rs +++ b/sandbox/plugins/parquet-data-format/src/main/rust/src/lib.rs @@ -6,11 +6,6 @@ * compatible open source license. */ -use mimalloc::MiMalloc; - -#[global_allocator] -static GLOBAL: MiMalloc = MiMalloc; - #[cfg(any(test, feature = "test-utils"))] pub mod test_utils; @@ -18,6 +13,6 @@ pub mod test_utils; mod tests; pub mod writer; -mod jni; +pub mod ffm; -pub use native_bridge_spi::{log_info, log_error, log_debug}; +pub use native_bridge_common::{log_info, log_error, log_debug}; diff --git a/sandbox/plugins/parquet-data-format/src/main/rust/src/writer.rs b/sandbox/plugins/parquet-data-format/src/main/rust/src/writer.rs index 36b6a2ba8493c..ceb28a0fa0464 100644 --- a/sandbox/plugins/parquet-data-format/src/main/rust/src/writer.rs +++ b/sandbox/plugins/parquet-data-format/src/main/rust/src/writer.rs @@ -14,6 +14,7 @@ use parquet::arrow::ArrowWriter; use parquet::basic::Compression; use parquet::file::properties::WriterProperties; use parquet::file::reader::{FileReader, SerializedFileReader}; +use parquet::file::metadata::ParquetMetaData; use parquet::format::FileMetaData as FormatFileMetaData; use std::fs::File; use std::sync::{Arc, Mutex}; @@ -100,9 +101,21 @@ impl NativeParquetWriter { match Arc::try_unwrap(writer_arc) { Ok(mutex) => { let writer = mutex.into_inner().unwrap(); - let file_metadata = writer.close()?; - log_debug!("Successfully closed writer for file: {}, num_rows={}", filename, file_metadata.num_rows); - Ok(Some(file_metadata)) + let parquet_metadata = writer.close()?; + let file_metadata = parquet_metadata.file_metadata(); + log_debug!("Successfully closed writer for file: {}, num_rows={}", filename, file_metadata.num_rows()); + let format_metadata = FormatFileMetaData { + version: file_metadata.version(), + num_rows: file_metadata.num_rows(), + created_by: file_metadata.created_by().map(|s| s.to_string()), + schema: vec![], + row_groups: vec![], + key_value_metadata: None, + encryption_algorithm: None, + footer_signing_key_metadata: None, + column_orders: None, + }; + Ok(Some(format_metadata)) } Err(_) => { log_error!("ERROR: Writer still in use for file: {}", filename); diff --git a/server/src/main/java/org/opensearch/action/bulk/BulkItemRequest.java b/server/src/main/java/org/opensearch/action/bulk/BulkItemRequest.java index b0ba44c308b95..827632bcbdf87 100644 --- a/server/src/main/java/org/opensearch/action/bulk/BulkItemRequest.java +++ b/server/src/main/java/org/opensearch/action/bulk/BulkItemRequest.java @@ -36,56 +36,43 @@ import org.apache.lucene.util.RamUsageEstimator; import org.opensearch.action.DocWriteRequest; import org.opensearch.common.Nullable; -import org.opensearch.core.common.Strings; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.common.io.stream.Writeable; import org.opensearch.core.index.shard.ShardId; -import org.opensearch.core.xcontent.MediaTypeRegistry; import java.io.IOException; -import java.util.Objects; /** * Transport request for a Single bulk item * * @opensearch.internal */ -public class BulkItemRequest implements Writeable, Accountable { +public record BulkItemRequest(int id, DocWriteRequest request, BulkItemResponse primaryResponse) implements Writeable, Accountable { private static final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(BulkItemRequest.class); - private int id; - private DocWriteRequest request; - private volatile BulkItemResponse primaryResponse; - /** * @param shardId the shard id */ BulkItemRequest(@Nullable ShardId shardId, StreamInput in) throws IOException { - id = in.readVInt(); - request = DocWriteRequest.readDocumentRequest(shardId, in); + this(in.readVInt(), DocWriteRequest.readDocumentRequest(shardId, in), readPrimaryResponse(shardId, in)); + } + + private static BulkItemResponse readPrimaryResponse(ShardId shardId, StreamInput in) throws IOException { if (in.readBoolean()) { if (shardId == null) { - primaryResponse = new BulkItemResponse(in); + return new BulkItemResponse(in); } else { - primaryResponse = new BulkItemResponse(shardId, in); + return new BulkItemResponse(shardId, in); } } + return null; } // NOTE: public for testing only public BulkItemRequest(int id, DocWriteRequest request) { - this.id = id; - this.request = request; - } - - public int id() { - return id; - } - - public DocWriteRequest request() { - return request; + this(id, request, null); } public String index() { @@ -93,42 +80,6 @@ public String index() { return request.indices()[0]; } - BulkItemResponse getPrimaryResponse() { - return primaryResponse; - } - - void setPrimaryResponse(BulkItemResponse primaryResponse) { - this.primaryResponse = primaryResponse; - } - - /** - * Abort this request, and store a {@link org.opensearch.action.bulk.BulkItemResponse.Failure} response. - * - * @param index The concrete index that was resolved for this request - * @param cause The cause of the rejection (may not be null) - * @throws IllegalStateException If a response already exists for this request - */ - public void abort(String index, Exception cause) { - if (primaryResponse == null) { - final BulkItemResponse.Failure failure = new BulkItemResponse.Failure(index, request.id(), Objects.requireNonNull(cause), true); - setPrimaryResponse(new BulkItemResponse(id, request.opType(), failure)); - } else { - assert primaryResponse.isFailed() && primaryResponse.getFailure().isAborted() : "response [" - + Strings.toString(MediaTypeRegistry.JSON, primaryResponse) - + "]; cause [" - + cause - + "]"; - if (primaryResponse.isFailed() && primaryResponse.getFailure().isAborted()) { - primaryResponse.getFailure().getCause().addSuppressed(cause); - } else { - throw new IllegalStateException( - "aborting item that with response [" + primaryResponse + "] that was previously processed", - cause - ); - } - } - } - @Override public void writeTo(StreamOutput out) throws IOException { out.writeVInt(id); @@ -139,8 +90,6 @@ public void writeTo(StreamOutput out) throws IOException { public void writeThin(StreamOutput out) throws IOException { out.writeVInt(id); DocWriteRequest.writeDocumentRequestThin(out, request); - - BulkItemResponse primaryResponse = this.primaryResponse; // Read volatile once out.writeOptionalWriteable((o, resp) -> resp.writeThin(o), primaryResponse); } diff --git a/server/src/main/java/org/opensearch/action/bulk/BulkItemResponse.java b/server/src/main/java/org/opensearch/action/bulk/BulkItemResponse.java index e43fb2854be91..1334afbabf0be 100644 --- a/server/src/main/java/org/opensearch/action/bulk/BulkItemResponse.java +++ b/server/src/main/java/org/opensearch/action/bulk/BulkItemResponse.java @@ -421,7 +421,6 @@ public long getTerm() { /** * Whether this failure is the result of an abort. * If {@code true}, the request to which this failure relates should never be retried, regardless of the {@link #getCause() cause}. - * @see BulkItemRequest#abort(String, Exception) */ public boolean isAborted() { return aborted; diff --git a/server/src/main/java/org/opensearch/action/bulk/BulkPrimaryExecutionContext.java b/server/src/main/java/org/opensearch/action/bulk/BulkPrimaryExecutionContext.java index fcdf0bc8a4bb1..42711b7f440d4 100644 --- a/server/src/main/java/org/opensearch/action/bulk/BulkPrimaryExecutionContext.java +++ b/server/src/main/java/org/opensearch/action/bulk/BulkPrimaryExecutionContext.java @@ -45,9 +45,6 @@ import java.util.Arrays; -import static org.opensearch.action.bulk.BulkShardResponse.DEFAULT_QUEUE_SIZE; -import static org.opensearch.action.bulk.BulkShardResponse.DEFAULT_SERVICE_TIME_IN_NANOS; - /** * This is a utility class that holds the per request state needed to perform bulk operations on the primary. * More specifically, it maintains an index to the current executing bulk item, which allows execution @@ -91,19 +88,21 @@ enum ItemProcessingState { private int currentIndex = -1; private ItemProcessingState currentItemState; - private DocWriteRequest requestToExecute; + private DocWriteRequest requestToExecute; private BulkItemResponse executionResult; private int retryCounter; + private final BulkItemResponse[] primaryResponses; BulkPrimaryExecutionContext(BulkShardRequest request, IndexShard primary) { this.request = request; this.primary = primary; + this.primaryResponses = new BulkItemResponse[request.items().length]; advance(); } private int findNextNonAborted(int startIndex) { final int length = request.items().length; - while (startIndex < length && isAborted(request.items()[startIndex].getPrimaryResponse())) { + while (startIndex < length && isAborted(request.items()[startIndex].primaryResponse())) { startIndex++; } return startIndex; @@ -131,7 +130,7 @@ public DocWriteRequest getCurrent() { } public BulkShardRequest getBulkShardRequest() { - return request; + return request.setPrimaryResponses(primaryResponses); } /** returns the result of the request that has been executed on the shard */ @@ -145,21 +144,11 @@ public int getRetryCounter() { return retryCounter; } - /** returns true if the current request has been executed on the primary */ - public boolean isOperationExecuted() { - return currentItemState == ItemProcessingState.EXECUTED; - } - /** returns true if the request needs to wait for a mapping update to arrive from the cluster-manager */ public boolean requiresWaitingForMappingUpdate() { return currentItemState == ItemProcessingState.WAIT_FOR_MAPPING_UPDATE; } - /** returns true if the current request should be retried without waiting for an external event */ - public boolean requiresImmediateRetry() { - return currentItemState == ItemProcessingState.IMMEDIATE_RETRY; - } - /** * returns true if the current request has been completed and it's result translated to a user * facing response @@ -206,10 +195,10 @@ public IndexShard getPrimary() { } /** - * sets the request that should actually be executed on the primary. This can be different then the request + * sets the request that should actually be executed on the primary. This can be different from the request * received from the user (specifically, an update request is translated to an indexing or delete request). */ - public void setRequestToExecute(DocWriteRequest writeRequest) { + public void setRequestToExecute(DocWriteRequest writeRequest) { assert assertInvariants(ItemProcessingState.INITIAL); requestToExecute = writeRequest; currentItemState = ItemProcessingState.TRANSLATED; @@ -217,6 +206,7 @@ public void setRequestToExecute(DocWriteRequest writeRequest) { } /** returns the request that should be executed on the shard. */ + @SuppressWarnings("unchecked") public > T getRequestToExecute() { assert assertInvariants(ItemProcessingState.TRANSLATED); return (T) requestToExecute; @@ -252,7 +242,7 @@ public void markOperationAsNoOp(DocWriteResponse response) { public void failOnMappingUpdate(Exception cause) { assert assertInvariants(ItemProcessingState.WAIT_FOR_MAPPING_UPDATE); currentItemState = ItemProcessingState.EXECUTED; - final DocWriteRequest docWriteRequest = getCurrentItem().request(); + final DocWriteRequest docWriteRequest = getCurrentItem().request(); executionResult = new BulkItemResponse( getCurrentItem().id(), docWriteRequest.opType(), @@ -267,7 +257,7 @@ public void failOnMappingUpdate(Exception cause) { public void markOperationAsExecuted(Engine.Result result) { assertInvariants(ItemProcessingState.TRANSLATED); final BulkItemRequest current = getCurrentItem(); - DocWriteRequest docWriteRequest = getRequestToExecute(); + DocWriteRequest docWriteRequest = getRequestToExecute(); switch (result.getResultType()) { case SUCCESS: final DocWriteResponse response; @@ -347,24 +337,16 @@ public void markAsCompleted(BulkItemResponse translatedResponse) { if (translatedResponse.isFailed() == false && requestToExecute != null && requestToExecute != getCurrent()) { request.items()[currentIndex] = new BulkItemRequest(request.items()[currentIndex].id(), requestToExecute); } - getCurrentItem().setPrimaryResponse(translatedResponse); + primaryResponses[currentIndex] = translatedResponse; currentItemState = ItemProcessingState.COMPLETED; + assertInvariants(ItemProcessingState.COMPLETED); advance(); } /** builds the bulk shard response to return to the user */ public BulkShardResponse buildShardResponse(long serviceTimeEWMAInNanos, int nodeQueueSize) { assert hasMoreOperationsToExecute() == false; - return new BulkShardResponse( - request.shardId(), - Arrays.stream(request.items()).map(BulkItemRequest::getPrimaryResponse).toArray(BulkItemResponse[]::new), - serviceTimeEWMAInNanos, - nodeQueueSize - ); - } - - public BulkShardResponse buildShardResponse() { - return buildShardResponse(DEFAULT_SERVICE_TIME_IN_NANOS, DEFAULT_QUEUE_SIZE); + return new BulkShardResponse(request.shardId(), primaryResponses, serviceTimeEWMAInNanos, nodeQueueSize); } private boolean assertInvariants(ItemProcessingState... expectedCurrentState) { @@ -379,7 +361,7 @@ private boolean assertInvariants(ItemProcessingState... expectedCurrentState) { assert requestToExecute == null : requestToExecute; assert executionResult == null : executionResult; break; - case TRANSLATED: + case TRANSLATED, IMMEDIATE_RETRY: assert requestToExecute != null; assert executionResult == null : executionResult; break; @@ -387,18 +369,14 @@ private boolean assertInvariants(ItemProcessingState... expectedCurrentState) { assert requestToExecute == null; assert executionResult == null : executionResult; break; - case IMMEDIATE_RETRY: - assert requestToExecute != null; - assert executionResult == null : executionResult; - break; case EXECUTED: // requestToExecute can be null if the update ended up as NOOP assert executionResult != null; break; case COMPLETED: - assert requestToExecute != null; + // requestToExecute can be null if the update ended up as NOOP assert executionResult != null; - assert getCurrentItem().getPrimaryResponse() != null; + assert primaryResponses[currentIndex] != null; break; } return true; diff --git a/server/src/main/java/org/opensearch/action/bulk/BulkShardRequest.java b/server/src/main/java/org/opensearch/action/bulk/BulkShardRequest.java index 453d98ea6255f..d9a72f80042ed 100644 --- a/server/src/main/java/org/opensearch/action/bulk/BulkShardRequest.java +++ b/server/src/main/java/org/opensearch/action/bulk/BulkShardRequest.java @@ -68,6 +68,24 @@ public BulkShardRequest(ShardId shardId, RefreshPolicy refreshPolicy, BulkItemRe setRefreshPolicy(refreshPolicy); } + BulkShardRequest setPrimaryResponses(BulkItemResponse[] primaryResponses) { + if (primaryResponses == null || primaryResponses.length != items.length) { + throw new IllegalArgumentException("Primary responses must have same length as BulkItemRequests"); + } + BulkItemRequest[] newRequests = new BulkItemRequest[items.length]; + for (int i = 0; i < items.length; i++) { + BulkItemRequest request = items[i]; + if (request == null) { + newRequests[i] = null; + } else { + newRequests[i] = new BulkItemRequest(request.id(), request.request(), primaryResponses[i]); + } + } + BulkShardRequest bulkShardRequest = new BulkShardRequest(shardId, getRefreshPolicy(), newRequests); + cloneProperties(bulkShardRequest); + return bulkShardRequest; + } + public BulkItemRequest[] items() { return items; } diff --git a/server/src/main/java/org/opensearch/action/bulk/TransportShardBulkAction.java b/server/src/main/java/org/opensearch/action/bulk/TransportShardBulkAction.java index 3f06354de9b5d..36c3ca13a21c3 100644 --- a/server/src/main/java/org/opensearch/action/bulk/TransportShardBulkAction.java +++ b/server/src/main/java/org/opensearch/action/bulk/TransportShardBulkAction.java @@ -860,9 +860,10 @@ public static Translog.Location performOnReplica(BulkShardRequest request, Index Translog.Location location = null; for (int i = 0; i < request.items().length; i++) { final BulkItemRequest item = request.items()[i]; - final BulkItemResponse response = item.getPrimaryResponse(); + final BulkItemResponse response = item.primaryResponse(); final Engine.Result operationResult; - if (item.getPrimaryResponse().isFailed()) { + assert response != null; + if (response.isFailed()) { if (response.getFailure().getSeqNo() == SequenceNumbers.UNASSIGNED_SEQ_NO) { continue; // ignore replication as we didn't generate a sequence number for this request. } @@ -886,7 +887,7 @@ public static Translog.Location performOnReplica(BulkShardRequest request, Index assert response.getResponse().getSeqNo() != SequenceNumbers.UNASSIGNED_SEQ_NO; operationResult = performOpOnReplica(response.getResponse(), item.request(), replica); } - assert operationResult != null : "operation result must never be null when primary response has no failure"; + assert operationResult != null : "operation result must never be null"; location = syncOperationResultOrThrow(operationResult, location); } return location; diff --git a/server/src/main/java/org/opensearch/action/search/QueryPhaseResultConsumer.java b/server/src/main/java/org/opensearch/action/search/QueryPhaseResultConsumer.java index b04d3086d8c95..f84089d6e77b1 100644 --- a/server/src/main/java/org/opensearch/action/search/QueryPhaseResultConsumer.java +++ b/server/src/main/java/org/opensearch/action/search/QueryPhaseResultConsumer.java @@ -194,7 +194,7 @@ public SearchPhaseController.ReducedQueryPhase reduce() throws Exception { aggReduceContextBuilder, performFinalReduce ); - if (hasAggs) { + if (hasAggs && reducePhase.aggregations != null) { // Update the circuit breaker to replace the estimation with the serialized size of the newly reduced result long finalSize = reducePhase.aggregations.getSerializedSize() - breakerSize; pendingReduces.addWithoutBreaking(finalSize); diff --git a/server/src/main/java/org/opensearch/action/support/replication/ReplicationRequest.java b/server/src/main/java/org/opensearch/action/support/replication/ReplicationRequest.java index e7451e4f3e328..e8fb173cf1953 100644 --- a/server/src/main/java/org/opensearch/action/support/replication/ReplicationRequest.java +++ b/server/src/main/java/org/opensearch/action/support/replication/ReplicationRequest.java @@ -262,4 +262,11 @@ public String getDescription() { public void onRetry() { // nothing by default } + + protected void cloneProperties(ReplicationRequest target) { + target.waitForActiveShards(waitForActiveShards()); + target.timeout(timeout()); + target.routedBasedOnClusterVersion(routedBasedOnClusterVersion()); + target.setParentTask(getParentTask()); + } } diff --git a/server/src/main/java/org/opensearch/common/settings/FeatureFlagSettings.java b/server/src/main/java/org/opensearch/common/settings/FeatureFlagSettings.java index 6e81243e3b9b6..8ebf3bf207bb1 100644 --- a/server/src/main/java/org/opensearch/common/settings/FeatureFlagSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/FeatureFlagSettings.java @@ -37,6 +37,7 @@ protected FeatureFlagSettings( FeatureFlags.REMOTE_STORE_MIGRATION_EXPERIMENTAL_SETTING, FeatureFlags.APPLICATION_BASED_CONFIGURATION_TEMPLATES_SETTING, FeatureFlags.TERM_VERSION_PRECOMMIT_ENABLE_SETTING, - FeatureFlags.STREAM_TRANSPORT_SETTING + FeatureFlags.STREAM_TRANSPORT_SETTING, + FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_SETTING ); } diff --git a/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java b/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java index 3494f5557d7b3..a3a49bb2fd9e9 100644 --- a/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java @@ -303,6 +303,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings { // Setting for derived source feature IndexSettings.INDEX_DERIVED_SOURCE_SETTING, IndexSettings.INDEX_DERIVED_SOURCE_TRANSLOG_ENABLED_SETTING, + IndexSettings.PLUGGABLE_DATAFORMAT_ENABLED_SETTING, // Writable warm / tiering settings - always registered so nodes can parse // index metadata even when the feature flag is disabled diff --git a/server/src/main/java/org/opensearch/common/util/FeatureFlags.java b/server/src/main/java/org/opensearch/common/util/FeatureFlags.java index 904f3594b68de..2a53dc05f6559 100644 --- a/server/src/main/java/org/opensearch/common/util/FeatureFlags.java +++ b/server/src/main/java/org/opensearch/common/util/FeatureFlags.java @@ -74,6 +74,17 @@ public class FeatureFlags { Property.NodeScope ); + /** + * Gates the functionality of pluggable dataformat feature. + */ + public static final String PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG = FEATURE_FLAG_PREFIX + "pluggable.dataformat.enabled"; + + public static final Setting PLUGGABLE_DATAFORMAT_EXPERIMENTAL_SETTING = Setting.boolSetting( + PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG, + false, + Property.NodeScope + ); + public static final Setting CONTEXT_AWARE_MIGRATION_EXPERIMENTAL_SETTING = Setting.boolSetting( CONTEXT_AWARE_MIGRATION_EXPERIMENTAL_FLAG, false, @@ -141,6 +152,7 @@ static class FeatureFlagsImpl { put(TERM_VERSION_PRECOMMIT_ENABLE_SETTING, TERM_VERSION_PRECOMMIT_ENABLE_SETTING.getDefault(Settings.EMPTY)); put(STREAM_TRANSPORT_SETTING, STREAM_TRANSPORT_SETTING.getDefault(Settings.EMPTY)); put(CONTEXT_AWARE_MIGRATION_EXPERIMENTAL_SETTING, CONTEXT_AWARE_MIGRATION_EXPERIMENTAL_SETTING.getDefault(Settings.EMPTY)); + put(PLUGGABLE_DATAFORMAT_EXPERIMENTAL_SETTING, PLUGGABLE_DATAFORMAT_EXPERIMENTAL_SETTING.getDefault(Settings.EMPTY)); } }; diff --git a/server/src/main/java/org/opensearch/index/IndexModule.java b/server/src/main/java/org/opensearch/index/IndexModule.java index 7f0eec00b4a96..c5f5871dfa687 100644 --- a/server/src/main/java/org/opensearch/index/IndexModule.java +++ b/server/src/main/java/org/opensearch/index/IndexModule.java @@ -46,6 +46,7 @@ import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.CheckedFunction; +import org.opensearch.common.CheckedTriFunction; import org.opensearch.common.SetOnce; import org.opensearch.common.TriFunction; import org.opensearch.common.annotation.ExperimentalApi; @@ -75,6 +76,7 @@ import org.opensearch.index.engine.EngineConfigFactory; import org.opensearch.index.engine.EngineFactory; import org.opensearch.index.engine.dataformat.DataFormatRegistry; +import org.opensearch.index.engine.exec.DataFormatAwareEngineFactory; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.shard.IndexEventListener; import org.opensearch.index.shard.IndexShard; @@ -776,7 +778,79 @@ public IndexService newIndexService( segmentReplicationStatsProvider, clusterDefaultMaxMergeAtOnceSupplier, clusterMergeSchedulerConfig, - null + (DataFormatRegistry) null + ); + } + + /** + * @deprecated Use the overload that accepts a {@code dataFormatRegistry} parameter. + */ + @Deprecated(forRemoval = true) + public IndexService newIndexService( + IndexService.IndexCreationContext indexCreationContext, + NodeEnvironment environment, + NamedXContentRegistry xContentRegistry, + IndexService.ShardStoreDeleter shardStoreDeleter, + CircuitBreakerService circuitBreakerService, + BigArrays bigArrays, + ThreadPool threadPool, + ScriptService scriptService, + ClusterService clusterService, + Client client, + IndicesQueryCache indicesQueryCache, + MapperRegistry mapperRegistry, + IndicesFieldDataCache indicesFieldDataCache, + NamedWriteableRegistry namedWriteableRegistry, + BooleanSupplier idFieldDataEnabled, + ValuesSourceRegistry valuesSourceRegistry, + IndexStorePlugin.DirectoryFactory remoteDirectoryFactory, + BiFunction translogFactorySupplier, + Supplier clusterDefaultRefreshIntervalSupplier, + Supplier fixedRefreshIntervalSchedulingEnabled, + Supplier shardLevelRefreshEnabled, + RecoverySettings recoverySettings, + RemoteStoreSettings remoteStoreSettings, + Consumer replicator, + Function segmentReplicationStatsProvider, + Supplier clusterDefaultMaxMergeAtOnceSupplier, + ClusterMergeSchedulerConfig clusterMergeSchedulerConfig, + CheckedTriFunction< + ShardPath, + MapperService, + IndexSettings, + DataFormatAwareEngineFactory, + IOException> dataFormatAwareEngineFactorySupplier + ) throws IOException { + + return newIndexService( + indexCreationContext, + environment, + xContentRegistry, + shardStoreDeleter, + circuitBreakerService, + bigArrays, + threadPool, + scriptService, + clusterService, + client, + indicesQueryCache, + mapperRegistry, + indicesFieldDataCache, + namedWriteableRegistry, + idFieldDataEnabled, + valuesSourceRegistry, + remoteDirectoryFactory, + translogFactorySupplier, + clusterDefaultRefreshIntervalSupplier, + fixedRefreshIntervalSchedulingEnabled, + shardLevelRefreshEnabled, + recoverySettings, + remoteStoreSettings, + replicator, + segmentReplicationStatsProvider, + clusterDefaultMaxMergeAtOnceSupplier, + clusterMergeSchedulerConfig, + (DataFormatRegistry) null ); } diff --git a/server/src/main/java/org/opensearch/index/IndexSettings.java b/server/src/main/java/org/opensearch/index/IndexSettings.java index 2dc33b3017572..66a1c29d6a9de 100644 --- a/server/src/main/java/org/opensearch/index/IndexSettings.java +++ b/server/src/main/java/org/opensearch/index/IndexSettings.java @@ -917,6 +917,13 @@ public static IndexMergePolicy fromString(String text) { Property.Dynamic ); + public static final Setting PLUGGABLE_DATAFORMAT_ENABLED_SETTING = Setting.boolSetting( + "index.pluggable.dataformat.enabled", + false, + Property.IndexScope, + Property.Final + ); + private final Index index; private final Version version; private final Logger logger; @@ -973,6 +980,7 @@ public static IndexMergePolicy fromString(String text) { private final boolean isTranslogMetadataEnabled; private volatile boolean allowDerivedField; private final boolean derivedSourceEnabled; + private final boolean pluggableDataFormatEnabled; private volatile boolean derivedSourceEnabledForTranslog; /** @@ -1221,6 +1229,8 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti checkPendingFlushEnabled = scopedSettings.get(INDEX_CHECK_PENDING_FLUSH_ENABLED); defaultSearchPipeline = scopedSettings.get(DEFAULT_SEARCH_PIPELINE); derivedSourceEnabled = scopedSettings.get(INDEX_DERIVED_SOURCE_SETTING); + pluggableDataFormatEnabled = FeatureFlags.isEnabled(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + && scopedSettings.get(PLUGGABLE_DATAFORMAT_ENABLED_SETTING); derivedSourceEnabledForTranslog = scopedSettings.get(INDEX_DERIVED_SOURCE_TRANSLOG_ENABLED_SETTING); scopedSettings.addSettingsUpdateConsumer(INDEX_DERIVED_SOURCE_TRANSLOG_ENABLED_SETTING, this::setDerivedSourceEnabledForTranslog); /* There was unintentional breaking change got introduced with [OpenSearch-6424](https://github.com/opensearch-project/OpenSearch/pull/6424) (version 2.7). @@ -2360,4 +2370,14 @@ public boolean isDerivedSourceEnabledForTranslog() { public boolean isDerivedSourceEnabled() { return derivedSourceEnabled; } + + /** + * Returns whether the pluggable data format feature is enabled for this index. + * Requires both the experimental feature flag and the index-level setting. + * + * @return {@code true} if pluggable data format is enabled + */ + public boolean isPluggableDataFormatEnabled() { + return pluggableDataFormatEnabled; + } } diff --git a/server/src/main/java/org/opensearch/index/engine/IngestionEngine.java b/server/src/main/java/org/opensearch/index/engine/IngestionEngine.java index ab8aeef204013..f5aac19f9290b 100644 --- a/server/src/main/java/org/opensearch/index/engine/IngestionEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/IngestionEngine.java @@ -50,6 +50,7 @@ import java.util.Map; import java.util.Objects; import java.util.function.BiFunction; +import java.util.function.Supplier; import static org.opensearch.action.index.IndexRequest.UNSET_AUTO_GENERATED_TIMESTAMP; import static org.opensearch.index.translog.Translog.EMPTY_TRANSLOG_LOCATION; @@ -62,7 +63,7 @@ public class IngestionEngine extends InternalEngine { private StreamPoller streamPoller; private final IngestionConsumerFactory ingestionConsumerFactory; - private final DocumentMapperForType documentMapperForType; + private final Supplier documentMapperForTypeSupplier; private final IngestPipelineExecutor pipelineExecutor; private volatile IngestionShardPointer lastCommittedBatchStartPointer; @@ -74,7 +75,7 @@ public IngestionEngine(EngineConfig engineConfig, IngestionConsumerFactory inges engineConfig.getIndexSettings().getIndex().getName(), engineConfig.getIndexSettings() ); - this.documentMapperForType = engineConfig.getDocumentMapperForTypeSupplier().get(); + this.documentMapperForTypeSupplier = engineConfig.getDocumentMapperForTypeSupplier(); registerDynamicIndexSettingsHandlers(); } @@ -499,7 +500,7 @@ private void updateWarmupConfig(TimeValue timeout, Long lagThreshold) { } public DocumentMapperForType getDocumentMapperForType() { - return documentMapperForType; + return documentMapperForTypeSupplier.get(); } @Override diff --git a/server/src/main/java/org/opensearch/index/mapper/AbstractGeometryFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/AbstractGeometryFieldMapper.java index b19ab9590ebe6..bb6c7b47a662d 100644 --- a/server/src/main/java/org/opensearch/index/mapper/AbstractGeometryFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/AbstractGeometryFieldMapper.java @@ -382,6 +382,11 @@ protected void parseCreateField(ParseContext context) throws IOException { throw new UnsupportedOperationException("Parsing is implemented in parse(), this method should NEVER be called"); } + @Override + protected void parseCreateFieldForPluggableFormat(ParseContext context) throws IOException { + throw new UnsupportedOperationException("Parsing is implemented in parse(), this method should NEVER be called"); + } + protected abstract void addStoredFields(ParseContext context, Processed geometry); protected abstract void addDocValuesFields(String name, Processed geometry, List fields, ParseContext context); diff --git a/server/src/main/java/org/opensearch/index/mapper/BinaryFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/BinaryFieldMapper.java index 040491f775357..65c647693688b 100644 --- a/server/src/main/java/org/opensearch/index/mapper/BinaryFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/BinaryFieldMapper.java @@ -194,14 +194,7 @@ protected void parseCreateField(ParseContext context) throws IOException { if (stored == false && hasDocValues == false) { return; } - byte[] value = context.parseExternalValue(byte[].class); - if (value == null) { - if (context.parser().currentToken() == XContentParser.Token.VALUE_NULL) { - return; - } else { - value = context.parser().binaryValue(); - } - } + byte[] value = parseBinaryValue(context); if (value == null) { return; } @@ -225,6 +218,27 @@ protected void parseCreateField(ParseContext context) throws IOException { } } + @Override + protected void parseCreateFieldForPluggableFormat(ParseContext context) throws IOException { + byte[] value = parseBinaryValue(context); + if (value == null) { + return; + } + context.documentInput().addField(fieldType(), value); + } + + private byte[] parseBinaryValue(ParseContext context) throws IOException { + byte[] value = context.parseExternalValue(byte[].class); + if (value == null) { + if (context.parser().currentToken() == XContentParser.Token.VALUE_NULL) { + return null; + } else { + value = context.parser().binaryValue(); + } + } + return value; + } + @Override public ParametrizedFieldMapper.Builder getMergeBuilder() { return new BinaryFieldMapper.Builder(simpleName()).init(this); diff --git a/server/src/main/java/org/opensearch/index/mapper/BooleanFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/BooleanFieldMapper.java index f3a027e3375e9..9ce7eaf751980 100644 --- a/server/src/main/java/org/opensearch/index/mapper/BooleanFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/BooleanFieldMapper.java @@ -374,18 +374,7 @@ protected void parseCreateField(ParseContext context) throws IOException { return; } - Boolean value = context.parseExternalValue(Boolean.class); - if (value == null) { - XContentParser.Token token = context.parser().currentToken(); - if (token == XContentParser.Token.VALUE_NULL) { - if (nullValue != null) { - value = nullValue; - } - } else { - value = context.parser().booleanValue(); - } - } - + Boolean value = parseBooleanValue(context); if (value == null) { return; } @@ -402,6 +391,30 @@ protected void parseCreateField(ParseContext context) throws IOException { } } + @Override + protected void parseCreateFieldForPluggableFormat(ParseContext context) throws IOException { + Boolean value = parseBooleanValue(context); + if (value == null) { + return; + } + context.documentInput().addField(fieldType(), value); + } + + private Boolean parseBooleanValue(ParseContext context) throws IOException { + Boolean value = context.parseExternalValue(Boolean.class); + if (value == null) { + XContentParser.Token token = context.parser().currentToken(); + if (token == XContentParser.Token.VALUE_NULL) { + if (nullValue != null) { + value = nullValue; + } + } else { + value = context.parser().booleanValue(); + } + } + return value; + } + @Override public ParametrizedFieldMapper.Builder getMergeBuilder() { return new Builder(simpleName()).init(this); diff --git a/server/src/main/java/org/opensearch/index/mapper/CompletionFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/CompletionFieldMapper.java index 246aa2e803954..c6931caac758d 100644 --- a/server/src/main/java/org/opensearch/index/mapper/CompletionFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/CompletionFieldMapper.java @@ -652,6 +652,11 @@ protected void parseCreateField(ParseContext context) throws IOException { // no-op } + @Override + protected void parseCreateFieldForPluggableFormat(ParseContext context) throws IOException { + // no-op + } + @Override protected String contentType() { return CONTENT_TYPE; diff --git a/server/src/main/java/org/opensearch/index/mapper/ConstantKeywordFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/ConstantKeywordFieldMapper.java index 84557563d8cd3..7664e855b0254 100644 --- a/server/src/main/java/org/opensearch/index/mapper/ConstantKeywordFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/ConstantKeywordFieldMapper.java @@ -252,7 +252,15 @@ public ParametrizedFieldMapper.Builder getMergeBuilder() { @Override protected void parseCreateField(ParseContext context) throws IOException { + validateConstantValue(context); + } + @Override + protected void parseCreateFieldForPluggableFormat(ParseContext context) throws IOException { + validateConstantValue(context); + } + + private void validateConstantValue(ParseContext context) throws IOException { final String value; if (context.externalValueSet()) { value = context.externalValue().toString(); @@ -266,7 +274,6 @@ protected void parseCreateField(ParseContext context) throws IOException { if (!value.equals(fieldType().value)) { throw new IllegalArgumentException("constant keyword field [" + name() + "] must have a value of [" + this.value + "]"); } - } @Override diff --git a/server/src/main/java/org/opensearch/index/mapper/ContextAwareGroupingFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/ContextAwareGroupingFieldMapper.java index 458fe10acf735..bac85f932cdd8 100644 --- a/server/src/main/java/org/opensearch/index/mapper/ContextAwareGroupingFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/ContextAwareGroupingFieldMapper.java @@ -175,6 +175,11 @@ protected void parseCreateField(ParseContext context) throws IOException { throw new MapperParsingException("context_aware_grouping cannot be ingested in the document"); } + @Override + protected void parseCreateFieldForPluggableFormat(ParseContext context) throws IOException { + throw new MapperParsingException("context_aware_grouping cannot be ingested in the document"); + } + public ContextAwareGroupingFieldType fieldType() { return (ContextAwareGroupingFieldType) mappedFieldType; } diff --git a/server/src/main/java/org/opensearch/index/mapper/DateFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/DateFieldMapper.java index 25f22838b7bb8..dc769030adcdf 100644 --- a/server/src/main/java/org/opensearch/index/mapper/DateFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/DateFieldMapper.java @@ -814,24 +814,9 @@ protected DateFieldMapper clone() { @Override protected void parseCreateField(ParseContext context) throws IOException { - String dateAsString = getFieldValue(context); - long timestamp; - if (dateAsString == null) { - if (nullValue == null) { - return; - } - timestamp = nullValue; - } else { - try { - timestamp = fieldType().parse(dateAsString); - } catch (IllegalArgumentException | OpenSearchParseException | DateTimeException | ArithmeticException e) { - if (ignoreMalformed().value()) { - context.addIgnoredField(mappedFieldType.name()); - return; - } else { - throw e; - } - } + Long timestamp = parseTimestamp(context); + if (timestamp == null) { + return; } if (indexed) { @@ -851,6 +836,38 @@ protected void parseCreateField(ParseContext context) throws IOException { } } + @Override + protected void parseCreateFieldForPluggableFormat(ParseContext context) throws IOException { + Long timestamp = parseTimestamp(context); + if (timestamp == null) { + return; + } + context.documentInput().addField(fieldType(), timestamp); + } + + private Long parseTimestamp(ParseContext context) throws IOException { + String dateAsString = getFieldValue(context); + long timestamp; + if (dateAsString == null) { + if (nullValue == null) { + return null; + } + timestamp = nullValue; + } else { + try { + timestamp = fieldType().parse(dateAsString); + } catch (IllegalArgumentException | OpenSearchParseException | DateTimeException | ArithmeticException e) { + if (ignoreMalformed().value()) { + context.addIgnoredField(mappedFieldType.name()); + return null; + } else { + throw e; + } + } + } + return timestamp; + } + boolean isSkiplistDefaultEnabled(IndexSortConfig indexSortConfig, String fieldName) { if (this.indexCreatedVersion.onOrAfter(Version.V_3_3_0)) { if (!isSkiplistConfigured) { diff --git a/server/src/main/java/org/opensearch/index/mapper/DerivedFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/DerivedFieldMapper.java index d89cd6ace87b1..beb73c0de9204 100644 --- a/server/src/main/java/org/opensearch/index/mapper/DerivedFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/DerivedFieldMapper.java @@ -238,6 +238,13 @@ protected void parseCreateField(ParseContext context) throws IOException { throw new UnsupportedOperationException("should not be invoked"); } + @Override + protected void parseCreateFieldForPluggableFormat(ParseContext context) throws IOException { + // Leaving this empty as the parsing should be handled via the Builder when root object is parsed. + // The context would not contain anything in this case since the DerivedFieldMapper is not indexed or stored. + throw new UnsupportedOperationException("should not be invoked"); + } + @Override public ParametrizedFieldMapper.Builder getMergeBuilder() { return new Builder(simpleName(), this.indexAnalyzers, defaultDateFormatter, defaultIgnoreMalformed).init(this); diff --git a/server/src/main/java/org/opensearch/index/mapper/DocumentMapper.java b/server/src/main/java/org/opensearch/index/mapper/DocumentMapper.java index cb7e08f062d6d..9a528544ca441 100644 --- a/server/src/main/java/org/opensearch/index/mapper/DocumentMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/DocumentMapper.java @@ -51,6 +51,7 @@ import org.opensearch.index.IndexSettings; import org.opensearch.index.IndexSortConfig; import org.opensearch.index.analysis.IndexAnalyzers; +import org.opensearch.index.engine.dataformat.DocumentInput; import org.opensearch.index.mapper.MapperService.MergeReason; import org.opensearch.index.mapper.MetadataFieldMapper.TypeParser; import org.opensearch.index.query.NestedQueryBuilder; @@ -253,6 +254,10 @@ public ParsedDocument parse(SourceToParse source) throws MapperParsingException return documentParser.parseDocument(source, mapping.metadataMappers); } + public ParsedDocument parse(SourceToParse source, DocumentInput documentInput) throws MapperParsingException { + return documentParser.parseDocument(source, mapping.metadataMappers, documentInput); + } + public ParsedDocument createDeleteTombstoneDoc(String index, String id) throws MapperParsingException { final SourceToParse emptySource = new SourceToParse(index, id, new BytesArray("{}"), MediaTypeRegistry.JSON); return documentParser.parseDocument(emptySource, deleteTombstoneMetadataFieldMappers).toTombstone(); diff --git a/server/src/main/java/org/opensearch/index/mapper/DocumentParser.java b/server/src/main/java/org/opensearch/index/mapper/DocumentParser.java index ba1bf1d189c7f..02ff66b51af7b 100644 --- a/server/src/main/java/org/opensearch/index/mapper/DocumentParser.java +++ b/server/src/main/java/org/opensearch/index/mapper/DocumentParser.java @@ -49,6 +49,7 @@ import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.core.xcontent.XContentParser; import org.opensearch.index.IndexSettings; +import org.opensearch.index.engine.dataformat.DocumentInput; import org.opensearch.index.mapper.DynamicTemplate.XContentFieldType; import org.opensearch.script.ContextAwareGroupingScript; @@ -82,6 +83,11 @@ final class DocumentParser { } ParsedDocument parseDocument(SourceToParse source, MetadataFieldMapper[] metadataFieldsMappers) throws MapperParsingException { + return parseDocument(source, metadataFieldsMappers, null); + } + + ParsedDocument parseDocument(SourceToParse source, MetadataFieldMapper[] metadataFieldsMappers, DocumentInput documentInput) + throws MapperParsingException { final Mapping mapping = docMapper.mapping(); final ParseContext.InternalParseContext context; final MediaType mediaType = source.getMediaType(); @@ -94,7 +100,7 @@ ParsedDocument parseDocument(SourceToParse source, MetadataFieldMapper[] metadat mediaType ) ) { - context = new ParseContext.InternalParseContext(indexSettings, docMapperParser, docMapper, source, parser); + context = new ParseContext.InternalParseContext(indexSettings, docMapperParser, docMapper, source, parser, documentInput); validateStart(parser); internalParseDocument(mapping, metadataFieldsMappers, context, parser); validateEnd(parser); @@ -108,7 +114,7 @@ ParsedDocument parseDocument(SourceToParse source, MetadataFieldMapper[] metadat context.postParse(); - return parsedDocument(source, context, createDynamicUpdate(mapping, docMapper, context.getDynamicMappers())); + return parsedDocument(source, context, createDynamicUpdate(mapping, docMapper, context.getDynamicMappers()), documentInput); } private static boolean containsDisabledObjectMapper(ObjectMapper objectMapper, String[] subfields) { @@ -182,7 +188,12 @@ private static boolean isEmptyDoc(Mapping mapping, XContentParser parser) throws return false; } - private static ParsedDocument parsedDocument(SourceToParse source, ParseContext.InternalParseContext context, Mapping update) { + private static ParsedDocument parsedDocument( + SourceToParse source, + ParseContext.InternalParseContext context, + Mapping update, + DocumentInput documentInput + ) { return new ParsedDocument( context.version(), context.seqID(), @@ -191,7 +202,8 @@ private static ParsedDocument parsedDocument(SourceToParse source, ParseContext. context.docs(), context.sourceToParse().source(), context.sourceToParse().getMediaType(), - update + update, + documentInput ); } diff --git a/server/src/main/java/org/opensearch/index/mapper/FieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/FieldMapper.java index 5ba68e6d7cb80..850605597394b 100644 --- a/server/src/main/java/org/opensearch/index/mapper/FieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/FieldMapper.java @@ -37,6 +37,7 @@ import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.LeafReader; import org.opensearch.common.Explicit; +import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.common.annotation.PublicApi; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Setting.Property; @@ -283,7 +284,11 @@ private boolean shouldIgnoreMalformed(IndexSettings is) { */ public void parse(ParseContext context) throws IOException { try { - parseCreateField(context); + if (isPluggableDataFormatFeatureEnabled(context)) { + parseCreateFieldForPluggableFormat(context); + } else { + parseCreateField(context); + } extractGroupingCriteriaParams(context); } catch (Exception e) { @@ -332,6 +337,22 @@ public void parse(ParseContext context) throws IOException { */ protected abstract void parseCreateField(ParseContext context) throws IOException; + /** + * Parse the field value and populate the pluggable data format's {@link ParseContext#documentInput()}. + *

+ * Subclasses that support pluggable data formats should override this method to extract the + * parsed value and call {@code context.documentInput().addField(fieldType(), value)}. + * The default implementation throws {@link UnsupportedOperationException}. + * + * @param context the parse context carrying the document input + * @throws IOException if an I/O error occurs while parsing + * @throws UnsupportedOperationException if the mapper does not support pluggable data formats + */ + @ExperimentalApi + protected void parseCreateFieldForPluggableFormat(ParseContext context) throws IOException { + throw new UnsupportedOperationException("Field mapper [" + typeName() + "] does not support pluggable data formats"); + }; + private void extractGroupingCriteriaParams(ParseContext context) throws IOException { if (context.docMapper() != null && context.docMapper().mappers() != null) { final Mapper mapper = context.docMapper().mappers().getMapper(ContextAwareGroupingFieldMapper.CONTENT_TYPE); @@ -366,11 +387,19 @@ protected final void createFieldNamesField(ParseContext context) { FieldNamesFieldType fieldNamesFieldType = context.docMapper().metadataMapper(FieldNamesFieldMapper.class).fieldType(); if (fieldNamesFieldType != null && fieldNamesFieldType.isEnabled()) { for (String fieldName : FieldNamesFieldMapper.extractFieldNames(fieldType().name())) { - context.doc().add(new Field(FieldNamesFieldMapper.NAME, fieldName, FieldNamesFieldMapper.Defaults.FIELD_TYPE)); + if (isPluggableDataFormatFeatureEnabled(context)) { + context.documentInput().addField(fieldNamesFieldType, fieldName); + } else { + context.doc().add(new Field(FieldNamesFieldMapper.NAME, fieldName, FieldNamesFieldMapper.Defaults.FIELD_TYPE)); + } } } } + protected final boolean isPluggableDataFormatFeatureEnabled(ParseContext parseContext) { + return parseContext.indexSettings().isPluggableDataFormatEnabled(); + } + @Override public Iterator iterator() { return multiFields.iterator(); diff --git a/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java index 7b1b2615f996d..319a0677610e9 100644 --- a/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java @@ -563,34 +563,50 @@ public FlatObjectFieldType fieldType() { @Override protected void parseCreateField(ParseContext context) throws IOException { + HashSet pathParts = parseObjectPathParts(context); + if (pathParts != null) { + createPathFields(context, pathParts); + } + } + + @Override + protected void parseCreateFieldForPluggableFormat(ParseContext context) throws IOException { + HashSet pathParts = parseObjectPathParts(context); + if (pathParts != null) { + createPathFieldsForPluggableFormat(context, pathParts); + } + } + + /** + * Parses the flat_object field value and returns the collected path parts, + * or {@code null} if the field should be skipped (null value or not searchable/stored/docvalues). + */ + private HashSet parseObjectPathParts(ParseContext context) throws IOException { XContentParser ctxParser = context.parser(); if (fieldType().isSearchable() == false && fieldType().isStored() == false && fieldType().hasDocValues() == false) { ctxParser.skipChildren(); - return; + return null; } - if (ctxParser.currentToken() != XContentParser.Token.VALUE_NULL) { - if (ctxParser.currentToken() != XContentParser.Token.START_OBJECT) { - throw new ParsingException( - ctxParser.getTokenLocation(), - "[" + this.name() + "] unexpected token [" + ctxParser.currentToken() + "] in flat_object field value" - ); - } - parseObject(ctxParser, context); + if (ctxParser.currentToken() == XContentParser.Token.VALUE_NULL) { + return null; + } + if (ctxParser.currentToken() != XContentParser.Token.START_OBJECT) { + throw new ParsingException( + ctxParser.getTokenLocation(), + "[" + this.name() + "] unexpected token [" + ctxParser.currentToken() + "] in flat_object field value" + ); } - } - private void parseObject(XContentParser parser, ParseContext context) throws IOException { - assert parser.currentToken() == XContentParser.Token.START_OBJECT; - parser.nextToken(); // Skip the outer START_OBJECT. Need to return on END_OBJECT. + assert ctxParser.currentToken() == XContentParser.Token.START_OBJECT; + ctxParser.nextToken(); LinkedList path = new LinkedList<>(Collections.singleton(fieldType().name())); HashSet pathParts = new HashSet<>(); - while (parser.currentToken() != XContentParser.Token.END_OBJECT) { - parseToken(parser, context, path, pathParts); + while (ctxParser.currentToken() != XContentParser.Token.END_OBJECT) { + parseToken(ctxParser, context, path, pathParts); } - - createPathFields(context, pathParts); + return pathParts; } private void createPathFields(ParseContext context, HashSet pathParts) { @@ -607,6 +623,13 @@ private void createPathFields(ParseContext context, HashSet pathParts) { } } + private void createPathFieldsForPluggableFormat(ParseContext context, HashSet pathParts) { + for (String part : pathParts) { + final BytesRef value = new BytesRef(name() + DOT_SYMBOL + part); + context.documentInput().addField(fieldType(), value); + } + } + private static String getDVPrefix(String rootFieldName) { return rootFieldName + DOT_SYMBOL; } diff --git a/server/src/main/java/org/opensearch/index/mapper/HllFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/HllFieldMapper.java index cb8841e2bdf0f..dfc210cfd19f2 100644 --- a/server/src/main/java/org/opensearch/index/mapper/HllFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/HllFieldMapper.java @@ -184,26 +184,38 @@ private HllFieldMapper(String simpleName, MappedFieldType mappedFieldType, Multi @Override protected void parseCreateField(ParseContext context) throws IOException { - // Parse binary HLL++ sketch data - byte[] value = context.parseExternalValue(byte[].class); + byte[] value = parseHllValue(context); if (value == null) { - if (context.parser().currentToken() == XContentParser.Token.VALUE_NULL) { - return; - } else { - value = context.parser().binaryValue(); - } + return; } + BytesRef sketchBytes = new BytesRef(value); + validateSketchData(sketchBytes); + context.doc().add(new BinaryDocValuesField(fieldType().name(), sketchBytes)); + } + + @Override + protected void parseCreateFieldForPluggableFormat(ParseContext context) throws IOException { + byte[] value = parseHllValue(context); if (value == null) { return; } - // Validate the sketch data BytesRef sketchBytes = new BytesRef(value); validateSketchData(sketchBytes); + context.documentInput().addField(fieldType(), value); + } - // Store as binary doc value - context.doc().add(new BinaryDocValuesField(fieldType().name(), sketchBytes)); + private byte[] parseHllValue(ParseContext context) throws IOException { + byte[] value = context.parseExternalValue(byte[].class); + if (value == null) { + if (context.parser().currentToken() == XContentParser.Token.VALUE_NULL) { + return null; + } else { + value = context.parser().binaryValue(); + } + } + return value; } /** diff --git a/server/src/main/java/org/opensearch/index/mapper/IpFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/IpFieldMapper.java index 992007329a135..144cee9f6034b 100644 --- a/server/src/main/java/org/opensearch/index/mapper/IpFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/IpFieldMapper.java @@ -653,6 +653,15 @@ protected void parseCreateField(ParseContext context) throws IOException { } } + @Override + protected void parseCreateFieldForPluggableFormat(ParseContext context) throws IOException { + final InetAddress address = getFieldValue(context); + if (address == null) { + return; + } + context.documentInput().addField(fieldType(), address); + } + @Override protected InetAddress getFieldValue(ParseContext context) throws IOException { Object addressAsObject; diff --git a/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java index 3271f60a466ee..a1ed5f8547a96 100644 --- a/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java @@ -860,6 +860,37 @@ public KeywordFieldType fieldType() { @Override protected void parseCreateField(ParseContext context) throws IOException { + String value = parseKeywordValue(context); + if (value == null) { + return; + } + + // convert to utf8 only once before feeding postings/dv/stored fields + final BytesRef binaryValue = new BytesRef(value); + if (fieldType.indexOptions() != IndexOptions.NONE || fieldType.stored()) { + Field field = new KeywordField(fieldType().name(), binaryValue, fieldType); + context.doc().add(field); + + if (fieldType().hasDocValues() == false && fieldType.omitNorms()) { + createFieldNamesField(context); + } + } + + if (fieldType().hasDocValues()) { + context.doc().add(new SortedSetDocValuesField(fieldType().name(), binaryValue)); + } + } + + @Override + protected void parseCreateFieldForPluggableFormat(ParseContext context) throws IOException { + String value = parseKeywordValue(context); + if (value == null) { + return; + } + context.documentInput().addField(fieldType(), value); + } + + private String parseKeywordValue(ParseContext context) throws IOException { String value; if (context.externalValueSet()) { value = context.externalValue().toString(); @@ -873,28 +904,14 @@ protected void parseCreateField(ParseContext context) throws IOException { } if (value == null || value.length() > ignoreAbove) { - return; + return null; } NamedAnalyzer normalizer = fieldType().normalizer(); if (normalizer != null) { value = normalizeValue(normalizer, name(), value); } - - // convert to utf8 only once before feeding postings/dv/stored fields - final BytesRef binaryValue = new BytesRef(value); - if (fieldType.indexOptions() != IndexOptions.NONE || fieldType.stored()) { - Field field = new KeywordField(fieldType().name(), binaryValue, fieldType); - context.doc().add(field); - - if (fieldType().hasDocValues() == false && fieldType.omitNorms()) { - createFieldNamesField(context); - } - } - - if (fieldType().hasDocValues()) { - context.doc().add(new SortedSetDocValuesField(fieldType().name(), binaryValue)); - } + return value; } static String normalizeValue(NamedAnalyzer normalizer, String field, String value) throws IOException { diff --git a/server/src/main/java/org/opensearch/index/mapper/Mapper.java b/server/src/main/java/org/opensearch/index/mapper/Mapper.java index 3b9024162656f..939f7de12e912 100644 --- a/server/src/main/java/org/opensearch/index/mapper/Mapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/Mapper.java @@ -39,6 +39,7 @@ import org.opensearch.common.annotation.PublicApi; import org.opensearch.common.settings.Settings; import org.opensearch.common.time.DateFormatter; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.core.xcontent.ToXContentFragment; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.index.analysis.IndexAnalyzers; @@ -52,6 +53,8 @@ import java.util.function.Function; import java.util.function.Supplier; +import static org.opensearch.index.IndexSettings.PLUGGABLE_DATAFORMAT_ENABLED_SETTING; + /** * The foundation OpenSearch mapper * @@ -303,6 +306,18 @@ protected static boolean hasIndexCreated(Settings settings) { return settings.hasValue(IndexMetadata.SETTING_INDEX_VERSION_CREATED.getKey()); } + /** + * Checks if the optimised index feature is enabled for the given settings. + * Requires both the {@link FeatureFlags#PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG} feature flag + * + * @param settings the index settings to check + * @return {@code true} if the pluggable dataformat feature flag and the optimised index setting are both enabled + */ + public static boolean isPluggableDataFormatEnabled(Settings settings) { + return FeatureFlags.isEnabled(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + && PLUGGABLE_DATAFORMAT_ENABLED_SETTING.get(settings); + } + /** * Method to determine, if it is possible to derive source for this field using field mapping parameters */ diff --git a/server/src/main/java/org/opensearch/index/mapper/MetadataFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/MetadataFieldMapper.java index c98d0c8242078..5a991bb75bd02 100644 --- a/server/src/main/java/org/opensearch/index/mapper/MetadataFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/MetadataFieldMapper.java @@ -205,6 +205,13 @@ protected void parseCreateField(ParseContext context) throws IOException { ); } + @Override + protected void parseCreateFieldForPluggableFormat(ParseContext context) throws IOException { + throw new MapperParsingException( + "Field [" + name() + "] is a metadata field and cannot be added inside" + " a document. Use the index API request parameters." + ); + } + /** * Called before {@link FieldMapper#parse(ParseContext)} on the {@link RootObjectMapper}. */ diff --git a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java index 3a7f942ef4525..ecf3e536896c8 100644 --- a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java @@ -2174,6 +2174,15 @@ protected void parseCreateField(ParseContext context) throws IOException { } } + @Override + protected void parseCreateFieldForPluggableFormat(ParseContext context) throws IOException { + Number numericValue = getFieldValue(context); + if (numericValue == null) { + return; + } + context.documentInput().addField(fieldType(), numericValue); + } + @Override protected Number getFieldValue(ParseContext context) throws IOException { XContentParser parser = context.parser(); diff --git a/server/src/main/java/org/opensearch/index/mapper/ParseContext.java b/server/src/main/java/org/opensearch/index/mapper/ParseContext.java index aaad2d1922d7a..d37c147c90e06 100644 --- a/server/src/main/java/org/opensearch/index/mapper/ParseContext.java +++ b/server/src/main/java/org/opensearch/index/mapper/ParseContext.java @@ -36,9 +36,11 @@ import org.apache.lucene.index.IndexableField; import org.apache.lucene.util.BytesRef; import org.opensearch.OpenSearchParseException; +import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.common.annotation.PublicApi; import org.opensearch.core.xcontent.XContentParser; import org.opensearch.index.IndexSettings; +import org.opensearch.index.engine.dataformat.DocumentInput; import java.util.ArrayList; import java.util.Collection; @@ -263,6 +265,11 @@ public Document doc() { return in.doc(); } + @Override + public DocumentInput documentInput() { + return in.documentInput(); + } + @Override protected void addDoc(Document doc) { in.addDoc(doc); @@ -414,12 +421,25 @@ public static class InternalParseContext extends ParseContext { private final Set ignoredFields = new HashSet<>(); + private DocumentInput documentInput; + public InternalParseContext( IndexSettings indexSettings, DocumentMapperParser docMapperParser, DocumentMapper docMapper, SourceToParse source, XContentParser parser + ) { + this(indexSettings, docMapperParser, docMapper, source, parser, null); + } + + public InternalParseContext( + IndexSettings indexSettings, + DocumentMapperParser docMapperParser, + DocumentMapper docMapper, + SourceToParse source, + XContentParser parser, + DocumentInput documentInput ) { this.indexSettings = indexSettings; this.docMapper = docMapper; @@ -427,6 +447,7 @@ public InternalParseContext( this.path = new ContentPath(0); this.parser = parser; this.document = new Document(); + this.documentInput = documentInput; this.documents = new ArrayList<>(); this.documents.add(document); this.version = null; @@ -479,6 +500,11 @@ public Document doc() { return this.document; } + @Override + public DocumentInput documentInput() { + return this.documentInput; + } + @Override protected void addDoc(Document doc) { numNestedDocs++; @@ -749,6 +775,9 @@ public boolean isWithinMultiFields() { public abstract Document doc(); + @ExperimentalApi + public abstract DocumentInput documentInput(); + protected abstract void addDoc(Document doc); public abstract RootObjectMapper root(); diff --git a/server/src/main/java/org/opensearch/index/mapper/ParsedDocument.java b/server/src/main/java/org/opensearch/index/mapper/ParsedDocument.java index 16e38980f8600..a29e88a51f714 100644 --- a/server/src/main/java/org/opensearch/index/mapper/ParsedDocument.java +++ b/server/src/main/java/org/opensearch/index/mapper/ParsedDocument.java @@ -37,6 +37,7 @@ import org.opensearch.common.xcontent.XContentType; import org.opensearch.core.common.bytes.BytesReference; import org.opensearch.core.xcontent.MediaType; +import org.opensearch.index.engine.dataformat.DocumentInput; import org.opensearch.index.mapper.MapperService.MergeReason; import org.opensearch.index.mapper.ParseContext.Document; @@ -63,6 +64,7 @@ public class ParsedDocument { private MediaType mediaType; private Mapping dynamicMappingsUpdate; + private DocumentInput documentInput; public ParsedDocument( Field version, @@ -73,6 +75,20 @@ public ParsedDocument( BytesReference source, MediaType mediaType, Mapping dynamicMappingsUpdate + ) { + this(version, seqID, id, routing, documents, source, mediaType, dynamicMappingsUpdate, null); + } + + public ParsedDocument( + Field version, + SeqNoFieldMapper.SequenceIDFields seqID, + String id, + String routing, + List documents, + BytesReference source, + MediaType mediaType, + Mapping dynamicMappingsUpdate, + DocumentInput documentInput ) { this.version = version; this.seqID = seqID; @@ -82,6 +98,11 @@ public ParsedDocument( this.source = source; this.dynamicMappingsUpdate = dynamicMappingsUpdate; this.mediaType = mediaType; + this.documentInput = documentInput; + } + + public DocumentInput getDocumentInput() { + return documentInput; } public String id() { diff --git a/server/src/main/java/org/opensearch/index/mapper/RangeFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/RangeFieldMapper.java index 71e0353b32db7..e02ba2b1bef6c 100644 --- a/server/src/main/java/org/opensearch/index/mapper/RangeFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/RangeFieldMapper.java @@ -481,6 +481,15 @@ protected void parseCreateField(ParseContext context) throws IOException { } } + @Override + protected void parseCreateFieldForPluggableFormat(ParseContext context) throws IOException { + Range range = parseRange(context); + if (range == null) { + return; + } + context.documentInput().addField(fieldType(), range); + } + private Range parseRange(ParseContext context) throws IOException { if (context.externalValueSet()) { return context.parseExternalValue(Range.class); diff --git a/server/src/main/java/org/opensearch/index/mapper/SemanticVersionFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/SemanticVersionFieldMapper.java index 3e3222ff5df75..b99bd05345ce1 100644 --- a/server/src/main/java/org/opensearch/index/mapper/SemanticVersionFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/SemanticVersionFieldMapper.java @@ -381,6 +381,15 @@ protected void parseCreateField(ParseContext context) throws IOException { } } + @Override + protected void parseCreateFieldForPluggableFormat(ParseContext context) throws IOException { + String value = context.parser().textOrNull(); + if (value == null) { + return; + } + context.documentInput().addField(fieldType(), value); + } + @Override public ParametrizedFieldMapper.Builder getMergeBuilder() { Builder builder = new Builder(name()); diff --git a/server/src/main/java/org/opensearch/index/mapper/StarTreeMapper.java b/server/src/main/java/org/opensearch/index/mapper/StarTreeMapper.java index d1217e98749ed..543c89ec8649a 100644 --- a/server/src/main/java/org/opensearch/index/mapper/StarTreeMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/StarTreeMapper.java @@ -569,6 +569,17 @@ protected void parseCreateField(ParseContext context) { ); } + @Override + protected void parseCreateFieldForPluggableFormat(ParseContext context) { + throw new MapperParsingException( + String.format( + Locale.ROOT, + "Field [%s] is a star tree field and cannot be added inside a document. Use the index API request parameters.", + name() + ) + ); + } + /** * Star tree mapped field type containing dimensions, metrics, star tree specs * diff --git a/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java index 50b30e65e315a..1e7deb1dca4ed 100644 --- a/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java @@ -709,6 +709,11 @@ protected void parseCreateField(ParseContext context) { throw new UnsupportedOperationException(); } + @Override + protected void parseCreateFieldForPluggableFormat(ParseContext context) throws IOException { + throw new UnsupportedOperationException(); + } + @Override protected void mergeOptions(FieldMapper other, List conflicts) { @@ -740,6 +745,11 @@ protected void parseCreateField(ParseContext context) { throw new UnsupportedOperationException(); } + @Override + protected void parseCreateFieldForPluggableFormat(ParseContext context) { + throw new UnsupportedOperationException(); + } + @Override protected void mergeOptions(FieldMapper other, List conflicts) { @@ -1055,6 +1065,15 @@ protected void parseCreateField(ParseContext context) throws IOException { } } + @Override + protected void parseCreateFieldForPluggableFormat(ParseContext context) throws IOException { + final String value = getFieldValue(context); + if (value == null) { + return; + } + context.documentInput().addField(fieldType(), value); + } + @Override protected String getFieldValue(ParseContext context) throws IOException { if (context.externalValueSet()) { diff --git a/server/src/main/java/org/opensearch/index/mapper/WildcardFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/WildcardFieldMapper.java index 2fd08ccfbd823..f29d8263a063a 100644 --- a/server/src/main/java/org/opensearch/index/mapper/WildcardFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/WildcardFieldMapper.java @@ -193,18 +193,7 @@ public int ignoreAbove() { @Override protected void parseCreateField(ParseContext context) throws IOException { - String value; - if (context.externalValueSet()) { - value = context.externalValue().toString(); - } else { - XContentParser parser = context.parser(); - if (parser.currentToken() == XContentParser.Token.VALUE_NULL) { - value = nullValue; - } else { - value = parser.textOrNull(); - } - } - + String value = parseWildcardValue(context); if (value == null || value.length() > ignoreAbove) { return; } @@ -228,6 +217,26 @@ protected void parseCreateField(ParseContext context) throws IOException { } } + @Override + protected void parseCreateFieldForPluggableFormat(ParseContext context) throws IOException { + String value = parseWildcardValue(context); + if (value == null || value.length() > ignoreAbove) { + return; + } + context.documentInput().addField(fieldType(), value); + } + + private String parseWildcardValue(ParseContext context) throws IOException { + if (context.externalValueSet()) { + return context.externalValue().toString(); + } + XContentParser parser = context.parser(); + if (parser.currentToken() == XContentParser.Token.VALUE_NULL) { + return nullValue; + } + return parser.textOrNull(); + } + /** * Tokenizer to emit tokens to support wildcard first-phase matching. *

diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index fa66e8405120a..8080b3ef22e05 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -243,6 +243,7 @@ import java.util.Optional; import java.util.Set; import java.util.concurrent.CompletionService; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorCompletionService; @@ -324,6 +325,8 @@ public class IndexShard extends AbstractIndexShardComponent implements IndicesCl private final IndexingOperationListener indexingOperationListeners; private final Runnable globalCheckpointSyncer; + private final ConcurrentHashMap nonClosingReaderWrapperCache = new ConcurrentHashMap<>(); + private final Function nonClosingReaderWrapperSupplier; Runnable getGlobalCheckpointSyncer() { return globalCheckpointSyncer; @@ -545,6 +548,34 @@ public boolean shouldCache(Query query) { } else { readerWrapper = indexReaderWrapper; } + + nonClosingReaderWrapperSupplier = directoryReader -> { + int[] fromCache = new int[] { 0 }; + try { + // To prevent instantiating a new NonClosingReaderWrapper per query/get/update request, + // the wrapper can be shared across all uses of the same NonClosingReaderWrapper. + return nonClosingReaderWrapperCache.computeIfAbsent(directoryReader, key -> { + try { + NonClosingReaderWrapper closingReaderWrapper = new NonClosingReaderWrapper(key); + fromCache[0] = 1; + return closingReaderWrapper; + } catch (IOException e) { + fromCache[0] = 2; + throw new OpenSearchException("failed to wrap searcher", e); + } + }); + } finally { + if (fromCache[0] == 1) { + OpenSearchDirectoryReader.addReaderCloseListener( + directoryReader, + cacheKey -> nonClosingReaderWrapperCache.remove(directoryReader) + ); + } else if (fromCache[0] == 2) { + nonClosingReaderWrapperCache.remove(directoryReader); + } + } + }; + refreshListeners = buildRefreshListeners(); lastSearcherAccess.set(threadPool.relativeTimeInMillis()); persistMetadata(path, indexSettings, shardRouting, null, logger); @@ -2253,7 +2284,9 @@ private Engine.Searcher wrapSearcher(Engine.Searcher searcher) { : "DirectoryReader must be an instance or OpenSearchDirectoryReader"; boolean success = false; try { - final Engine.Searcher newSearcher = readerWrapper == null ? searcher : wrapSearcher(searcher, readerWrapper); + final Engine.Searcher newSearcher = readerWrapper == null + ? searcher + : wrapSearcher(searcher, readerWrapper, nonClosingReaderWrapperSupplier); assert newSearcher != null; success = true; return newSearcher; @@ -2272,15 +2305,29 @@ private Engine.Searcher wrapSearcher(Engine.Searcher searcher) { public static Engine.Searcher wrapSearcher( Engine.Searcher engineSearcher, CheckedFunction readerWrapper + ) throws IOException { + return wrapSearcher(engineSearcher, readerWrapper, null); + } + + public static Engine.Searcher wrapSearcher( + Engine.Searcher engineSearcher, + CheckedFunction readerWrapper, + Function nonClosingReaderWrapperSupplier ) throws IOException { assert readerWrapper != null; - final OpenSearchDirectoryReader openSearchDirectoryReader = OpenSearchDirectoryReader.getOpenSearchDirectoryReader( - engineSearcher.getDirectoryReader() - ); + DirectoryReader directoryReader = engineSearcher.getDirectoryReader(); + final OpenSearchDirectoryReader openSearchDirectoryReader = OpenSearchDirectoryReader.getOpenSearchDirectoryReader(directoryReader); if (openSearchDirectoryReader == null) { throw new IllegalStateException("Can't wrap non opensearch directory reader"); } - NonClosingReaderWrapper nonClosingReaderWrapper = new NonClosingReaderWrapper(engineSearcher.getDirectoryReader()); + + DirectoryReader nonClosingReaderWrapper; + if (nonClosingReaderWrapperSupplier == null) { + nonClosingReaderWrapper = new NonClosingReaderWrapper(directoryReader); + } else { + nonClosingReaderWrapper = nonClosingReaderWrapperSupplier.apply(directoryReader); + assert nonClosingReaderWrapper instanceof NonClosingReaderWrapper; + } DirectoryReader reader = readerWrapper.apply(nonClosingReaderWrapper); if (reader != nonClosingReaderWrapper) { if (reader.getReaderCacheHelper() != openSearchDirectoryReader.getReaderCacheHelper()) { @@ -2377,6 +2424,7 @@ public void close(String reason, boolean flushEngine, boolean deleted) throws IO changeState(IndexShardState.CLOSED, reason); } } finally { + nonClosingReaderWrapperCache.clear(); final Indexer engine = this.currentEngineReference.getAndSet(null); try { if (engine != null && flushEngine) { @@ -6042,4 +6090,14 @@ public static T applyOnEngine(Indexer indexer, Function applier) throw new IllegalStateException("Cannot apply function on indexer " + indexer.getClass() + " directly on IndexShard"); } } + + // Visible for testing + Function nonClosingReaderWrapperSupplier() { + return nonClosingReaderWrapperSupplier; + } + + // Visible for testing + ConcurrentHashMap nonClosingReaderWrapperCache() { + return nonClosingReaderWrapperCache; + } } diff --git a/server/src/main/java/org/opensearch/search/fetch/subphase/FetchSourceContext.java b/server/src/main/java/org/opensearch/search/fetch/subphase/FetchSourceContext.java index 2bdb311c0003f..3d1f42b7e1eb7 100644 --- a/server/src/main/java/org/opensearch/search/fetch/subphase/FetchSourceContext.java +++ b/server/src/main/java/org/opensearch/search/fetch/subphase/FetchSourceContext.java @@ -140,61 +140,71 @@ public static FetchSourceContext parseFromRestRequest(RestRequest request) { public static FetchSourceContext fromXContent(XContentParser parser) throws IOException { XContentParser.Token token = parser.currentToken(); - boolean fetchSource = true; + switch (token) { + case XContentParser.Token.VALUE_BOOLEAN -> { + return parser.booleanValue() ? FETCH_SOURCE : DO_NOT_FETCH_SOURCE; + } + case XContentParser.Token.VALUE_STRING -> { + String[] includes = new String[] { parser.text() }; + return new FetchSourceContext(true, includes, null); + } + case XContentParser.Token.START_ARRAY -> { + ArrayList list = new ArrayList<>(); + while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { + list.add(parser.text()); + } + String[] includes = list.toArray(new String[0]); + return new FetchSourceContext(true, includes, null); + } + case XContentParser.Token.START_OBJECT -> { + return parseSourceObject(parser); + } + default -> { + throw new ParsingException( + parser.getTokenLocation(), + "Expected one of [" + + XContentParser.Token.VALUE_BOOLEAN + + ", " + + XContentParser.Token.START_OBJECT + + "] but found [" + + token + + "]" + ); + } + } + } + + private static FetchSourceContext parseSourceObject(XContentParser parser) throws IOException { + XContentParser.Token token = parser.currentToken(); String[] includes = Strings.EMPTY_ARRAY; String[] excludes = Strings.EMPTY_ARRAY; - if (token == XContentParser.Token.VALUE_BOOLEAN) { - fetchSource = parser.booleanValue(); - } else if (token == XContentParser.Token.VALUE_STRING) { - includes = new String[] { parser.text() }; - } else if (token == XContentParser.Token.START_ARRAY) { - ArrayList list = new ArrayList<>(); - while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { - list.add(parser.text()); + String currentFieldName = null; + if (token != XContentParser.Token.START_OBJECT) { + throw new ParsingException( + parser.getTokenLocation(), + "Expected a " + XContentParser.Token.START_OBJECT + " but got a " + token + " in [" + parser.currentName() + "]." + ); + } + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + currentFieldName = parser.currentName(); + continue; // only field name is required in this iteration } - includes = list.toArray(new String[0]); - } else if (token == XContentParser.Token.START_OBJECT) { - String currentFieldName = null; - while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { - if (token == XContentParser.Token.FIELD_NAME) { - currentFieldName = parser.currentName(); - } else if (token == XContentParser.Token.START_ARRAY) { + // process field value + switch (token) { + case XContentParser.Token.START_ARRAY -> { if (INCLUDES_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { - List includesList = new ArrayList<>(); - while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { - if (token == XContentParser.Token.VALUE_STRING) { - includesList.add(parser.text()); - } else { - throw new ParsingException( - parser.getTokenLocation(), - "Unknown key for a " + token + " in [" + currentFieldName + "].", - parser.getTokenLocation() - ); - } - } - includes = includesList.toArray(new String[0]); + includes = parseSourceArray(parser).toArray(new String[0]); } else if (EXCLUDES_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { - List excludesList = new ArrayList<>(); - while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { - if (token == XContentParser.Token.VALUE_STRING) { - excludesList.add(parser.text()); - } else { - throw new ParsingException( - parser.getTokenLocation(), - "Unknown key for a " + token + " in [" + currentFieldName + "].", - parser.getTokenLocation() - ); - } - } - excludes = excludesList.toArray(new String[0]); + excludes = parseSourceArray(parser).toArray(new String[0]); } else { throw new ParsingException( parser.getTokenLocation(), - "Unknown key for a " + token + " in [" + currentFieldName + "].", - parser.getTokenLocation() + "Unknown key for a " + token + " in [" + currentFieldName + "]." ); } - } else if (token == XContentParser.Token.VALUE_STRING) { + } + case XContentParser.Token.VALUE_STRING -> { if (INCLUDES_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { includes = new String[] { parser.text() }; } else if (EXCLUDES_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { @@ -202,32 +212,31 @@ public static FetchSourceContext fromXContent(XContentParser parser) throws IOEx } else { throw new ParsingException( parser.getTokenLocation(), - "Unknown key for a " + token + " in [" + currentFieldName + "].", - parser.getTokenLocation() + "Unknown key for a " + token + " in [" + currentFieldName + "]." ); } - } else { - throw new ParsingException( - parser.getTokenLocation(), - "Unknown key for a " + token + " in [" + currentFieldName + "].", - parser.getTokenLocation() - ); + } + default -> { + throw new ParsingException(parser.getTokenLocation(), "Unknown key for a " + token + " in [" + currentFieldName + "]."); } } - } else { - throw new ParsingException( - parser.getTokenLocation(), - "Expected one of [" - + XContentParser.Token.VALUE_BOOLEAN - + ", " - + XContentParser.Token.START_OBJECT - + "] but found [" - + token - + "]", - parser.getTokenLocation() - ); } - return new FetchSourceContext(fetchSource, includes, excludes); + return new FetchSourceContext(true, includes, excludes); + } + + private static List parseSourceArray(XContentParser parser) throws IOException { + List sourceArr = new ArrayList<>(); + while (parser.nextToken() != XContentParser.Token.END_ARRAY) { + if (parser.currentToken() == XContentParser.Token.VALUE_STRING) { + sourceArr.add(parser.text()); + } else { + throw new ParsingException( + parser.getTokenLocation(), + "Unknown key for a " + parser.currentToken() + " in [" + parser.currentName() + "]." + ); + } + } + return sourceArr; } @Override diff --git a/server/src/test/java/org/opensearch/action/bulk/BulkPrimaryExecutionContextTests.java b/server/src/test/java/org/opensearch/action/bulk/BulkPrimaryExecutionContextTests.java index 9745203e91586..19020f21d38bb 100644 --- a/server/src/test/java/org/opensearch/action/bulk/BulkPrimaryExecutionContextTests.java +++ b/server/src/test/java/org/opensearch/action/bulk/BulkPrimaryExecutionContextTests.java @@ -40,75 +40,18 @@ import org.opensearch.action.index.IndexRequest; import org.opensearch.action.support.WriteRequest; import org.opensearch.action.update.UpdateRequest; -import org.opensearch.core.index.AppendOnlyIndexOperationRetryException; import org.opensearch.core.index.shard.ShardId; import org.opensearch.index.engine.Engine; import org.opensearch.index.shard.IndexShard; import org.opensearch.index.translog.Translog; import org.opensearch.test.OpenSearchTestCase; -import java.util.ArrayList; - import static org.hamcrest.Matchers.equalTo; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; public class BulkPrimaryExecutionContextTests extends OpenSearchTestCase { - public void testAbortedSkipped() { - BulkShardRequest shardRequest = generateRandomRequest(); - - ArrayList> nonAbortedRequests = new ArrayList<>(); - for (BulkItemRequest request : shardRequest.items()) { - if (randomBoolean()) { - request.abort("index", new OpenSearchException("bla")); - } else { - nonAbortedRequests.add(request.request()); - } - } - - ArrayList> visitedRequests = new ArrayList<>(); - for (BulkPrimaryExecutionContext context = new BulkPrimaryExecutionContext(shardRequest, null); context - .hasMoreOperationsToExecute();) { - visitedRequests.add(context.getCurrent()); - context.setRequestToExecute(context.getCurrent()); - // using failures prevents caring about types - context.markOperationAsExecuted(new Engine.IndexResult(new OpenSearchException("bla"), 1)); - context.markAsCompleted(context.getExecutionResult()); - } - - assertThat(visitedRequests, equalTo(nonAbortedRequests)); - } - - public void testAppendOnlyIndexOperationRetryException() { - BulkShardRequest shardRequest = generateRandomRequest(); - - final IndexShard primary = mock(IndexShard.class); - when(primary.shardId()).thenReturn(shardRequest.shardId()); - ArrayList> nonAbortedRequests = new ArrayList<>(); - for (BulkItemRequest request : shardRequest.items()) { - if (randomBoolean()) { - request.abort("index", new AppendOnlyIndexOperationRetryException("Indexing operation retried for append only indices")); - } else { - nonAbortedRequests.add(request.request()); - } - } - - ArrayList> visitedRequests = new ArrayList<>(); - for (BulkPrimaryExecutionContext context = new BulkPrimaryExecutionContext(shardRequest, primary); context - .hasMoreOperationsToExecute();) { - visitedRequests.add(context.getCurrent()); - context.setRequestToExecute(context.getCurrent()); - // using failures prevents caring about types - context.markOperationAsExecuted( - new Engine.IndexResult(new AppendOnlyIndexOperationRetryException("Indexing operation retried for append only indices"), 1) - ); - context.markAsCompleted(context.getExecutionResult()); - } - - assertThat(visitedRequests, equalTo(nonAbortedRequests)); - } - private BulkShardRequest generateRandomRequest() { BulkItemRequest[] items = new BulkItemRequest[randomInt(20)]; for (int i = 0; i < items.length; i++) { diff --git a/server/src/test/java/org/opensearch/action/bulk/BulkShardRequestTests.java b/server/src/test/java/org/opensearch/action/bulk/BulkShardRequestTests.java index 38dc2bf414ff2..f9ec125ffd5f6 100644 --- a/server/src/test/java/org/opensearch/action/bulk/BulkShardRequestTests.java +++ b/server/src/test/java/org/opensearch/action/bulk/BulkShardRequestTests.java @@ -35,10 +35,14 @@ import org.opensearch.action.DocWriteRequest; import org.opensearch.action.delete.DeleteRequest; import org.opensearch.action.index.IndexRequest; +import org.opensearch.action.index.IndexResponse; +import org.opensearch.action.support.ActiveShardCount; import org.opensearch.action.support.WriteRequest.RefreshPolicy; import org.opensearch.action.update.UpdateRequest; import org.opensearch.common.io.stream.BytesStreamOutput; +import org.opensearch.common.unit.TimeValue; import org.opensearch.core.index.shard.ShardId; +import org.opensearch.core.tasks.TaskId; import org.opensearch.test.OpenSearchTestCase; import java.io.IOException; @@ -110,25 +114,65 @@ private BulkItemRequest[] generateBulkItemRequests(final int count) { items[i] = null; continue; } - final DocWriteRequest request; - switch (randomFrom(DocWriteRequest.OpType.values())) { - case INDEX: - request = new IndexRequest("index").id("id_" + i); - break; - case CREATE: - request = new IndexRequest("index").id("id_" + i).create(true); - break; - case UPDATE: - request = new UpdateRequest("index", "id_" + i); - break; - case DELETE: - request = new DeleteRequest("index", "id_" + i); - break; - default: - throw new AssertionError("unknown type"); - } + final DocWriteRequest request = switch (randomFrom(DocWriteRequest.OpType.values())) { + case INDEX -> new IndexRequest("index").id("id_" + i); + case CREATE -> new IndexRequest("index").id("id_" + i).create(true); + case UPDATE -> new UpdateRequest("index", "id_" + i); + case DELETE -> new DeleteRequest("index", "id_" + i); + }; items[i] = new BulkItemRequest(i, request); } return items; } + + public void testSetPrimaryResponses() { + final String index = randomSimpleString(random(), 1, 10); + final int count = between(2, 100); + final ShardId shardId = new ShardId(index, randomAlphaOfLength(10), randomIntBetween(0, 5)); + final RefreshPolicy refreshPolicy = randomFrom(RefreshPolicy.values()); + + final BulkItemRequest[] items = generateBulkItemRequests(count); + + final BulkShardRequest original = new BulkShardRequest(shardId, refreshPolicy, items); + + // Set random mutable properties from ReplicationRequest + original.timeout(TimeValue.timeValueMillis(randomLongBetween(1, 60000))); + original.waitForActiveShards(ActiveShardCount.from(randomIntBetween(0, 10))); + original.routedBasedOnClusterVersion(randomNonNegativeLong()); + original.setParentTask(new TaskId(randomAlphaOfLength(10), randomNonNegativeLong())); + + // Build primary responses for each item + final BulkItemResponse[] primaryResponses = new BulkItemResponse[count]; + for (int i = 0; i < count; i++) { + if (items[i] != null) { + primaryResponses[i] = new BulkItemResponse( + i, + items[i].request().opType(), + new IndexResponse(shardId, "id_" + i, 1, 1, 1, true) + ); + } + } + + final BulkShardRequest cloned = original.setPrimaryResponses(primaryResponses); + + // Verify all cloned properties match + assertEquals(original.shardId(), cloned.shardId()); + assertEquals(original.getRefreshPolicy(), cloned.getRefreshPolicy()); + assertEquals(original.timeout(), cloned.timeout()); + assertEquals(original.waitForActiveShards(), cloned.waitForActiveShards()); + assertEquals(original.routedBasedOnClusterVersion(), cloned.routedBasedOnClusterVersion()); + assertEquals(original.getParentTask(), cloned.getParentTask()); + + // Verify items have primary responses attached + assertEquals(count, cloned.items().length); + for (int i = 0; i < count; i++) { + if (items[i] == null) { + assertNull(cloned.items()[i]); + } else { + assertEquals(items[i].id(), cloned.items()[i].id()); + assertEquals(items[i].request().opType(), cloned.items()[i].request().opType()); + assertSame(primaryResponses[i], cloned.items()[i].primaryResponse()); + } + } + } } diff --git a/server/src/test/java/org/opensearch/action/bulk/TransportShardBulkActionTests.java b/server/src/test/java/org/opensearch/action/bulk/TransportShardBulkActionTests.java index 00bd99dd4b349..966910c8687bd 100644 --- a/server/src/test/java/org/opensearch/action/bulk/TransportShardBulkActionTests.java +++ b/server/src/test/java/org/opensearch/action/bulk/TransportShardBulkActionTests.java @@ -33,7 +33,6 @@ package org.opensearch.action.bulk; import org.opensearch.OpenSearchException; -import org.opensearch.OpenSearchStatusException; import org.opensearch.Version; import org.opensearch.action.DocWriteRequest; import org.opensearch.action.DocWriteResponse; @@ -112,8 +111,6 @@ import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.instanceOf; import static org.hamcrest.CoreMatchers.not; -import static org.hamcrest.CoreMatchers.notNullValue; -import static org.hamcrest.Matchers.arrayWithSize; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.nullValue; import static org.mockito.ArgumentMatchers.anyInt; @@ -159,7 +156,7 @@ public void testExecuteBulkIndexRequest() throws Exception { items[0] = primaryRequest; BulkShardRequest bulkShardRequest = new BulkShardRequest(shardId, RefreshPolicy.NONE, items); - randomlySetIgnoredPrimaryResponse(primaryRequest); + items[0] = randomlySetIgnoredPrimaryResponse(primaryRequest); BulkPrimaryExecutionContext context = new BulkPrimaryExecutionContext(bulkShardRequest, shard); TransportShardBulkAction.executeBulkItemRequest( @@ -170,12 +167,13 @@ public void testExecuteBulkIndexRequest() throws Exception { listener -> {}, ASSERTING_DONE_LISTENER ); + BulkShardRequest completedRequest = context.getBulkShardRequest(); assertFalse(context.hasMoreOperationsToExecute()); // Translog should change, since there were no problems assertNotNull(context.getLocationToSync()); - BulkItemResponse primaryResponse = bulkShardRequest.items()[0].getPrimaryResponse(); + BulkItemResponse primaryResponse = completedRequest.items()[0].primaryResponse(); assertThat(primaryResponse.getItemId(), equalTo(0)); assertThat(primaryResponse.getId(), equalTo("id")); @@ -190,7 +188,7 @@ public void testExecuteBulkIndexRequest() throws Exception { items[0] = primaryRequest; bulkShardRequest = new BulkShardRequest(shardId, RefreshPolicy.NONE, items); - randomlySetIgnoredPrimaryResponse(primaryRequest); + items[0] = randomlySetIgnoredPrimaryResponse(primaryRequest); BulkPrimaryExecutionContext secondContext = new BulkPrimaryExecutionContext(bulkShardRequest, shard); TransportShardBulkAction.executeBulkItemRequest( @@ -201,13 +199,14 @@ public void testExecuteBulkIndexRequest() throws Exception { listener -> {}, ASSERTING_DONE_LISTENER ); + completedRequest = secondContext.getBulkShardRequest(); assertFalse(context.hasMoreOperationsToExecute()); assertNull(secondContext.getLocationToSync()); - BulkItemRequest replicaRequest = bulkShardRequest.items()[0]; + BulkItemRequest replicaRequest = completedRequest.items()[0]; - primaryResponse = bulkShardRequest.items()[0].getPrimaryResponse(); + primaryResponse = replicaRequest.primaryResponse(); assertThat(primaryResponse.getItemId(), equalTo(0)); assertThat(primaryResponse.getId(), equalTo("id")); @@ -222,77 +221,15 @@ public void testExecuteBulkIndexRequest() throws Exception { assertThat(failure.getCause().getMessage(), containsString("version conflict, document already exists (current version [1])")); assertThat(failure.getStatus(), equalTo(RestStatus.CONFLICT)); - assertThat(replicaRequest, equalTo(primaryRequest)); + assertEquals(primaryRequest.request(), replicaRequest.request()); + assertEquals(primaryRequest.index(), replicaRequest.index()); + assertEquals(primaryRequest.id(), replicaRequest.id()); // Assert that the document count is still 1 assertDocCount(shard, 1); closeShards(shard); } - public void testSkipBulkIndexRequestIfAborted() throws Exception { - IndexShard shard = newStartedShard(true); - - BulkItemRequest[] items = new BulkItemRequest[randomIntBetween(2, 5)]; - for (int i = 0; i < items.length; i++) { - DocWriteRequest writeRequest = new IndexRequest("index").id("id_" + i) - .source(Requests.INDEX_CONTENT_TYPE) - .opType(DocWriteRequest.OpType.INDEX); - items[i] = new BulkItemRequest(i, writeRequest); - } - BulkShardRequest bulkShardRequest = new BulkShardRequest(shardId, RefreshPolicy.NONE, items); - - // Preemptively abort one of the bulk items, but allow the others to proceed - BulkItemRequest rejectItem = randomFrom(items); - RestStatus rejectionStatus = randomFrom(RestStatus.BAD_REQUEST, RestStatus.CONFLICT, RestStatus.FORBIDDEN, RestStatus.LOCKED); - final OpenSearchStatusException rejectionCause = new OpenSearchStatusException("testing rejection", rejectionStatus); - rejectItem.abort("index", rejectionCause); - - final CountDownLatch latch = new CountDownLatch(1); - TransportShardBulkAction.performOnPrimary( - bulkShardRequest, - shard, - null, - threadPool::absoluteTimeInMillis, - new NoopMappingUpdatePerformer(), - listener -> {}, - ActionListener.runAfter(ActionTestUtils.assertNoFailureListener(result -> { - // since at least 1 item passed, the tran log location should exist, - assertThat(((WritePrimaryResult) result).location, notNullValue()); - // and the response should exist and match the item count - assertThat(result.finalResponseIfSuccessful, notNullValue()); - assertThat(result.finalResponseIfSuccessful.getResponses(), arrayWithSize(items.length)); - - // check each response matches the input item, including the rejection - for (int i = 0; i < items.length; i++) { - BulkItemResponse response = result.finalResponseIfSuccessful.getResponses()[i]; - assertThat(response.getItemId(), equalTo(i)); - assertThat(response.getIndex(), equalTo("index")); - assertThat(response.getId(), equalTo("id_" + i)); - assertThat(response.getOpType(), equalTo(DocWriteRequest.OpType.INDEX)); - if (response.getItemId() == rejectItem.id()) { - assertTrue(response.isFailed()); - assertThat(response.getFailure().getCause(), equalTo(rejectionCause)); - assertThat(response.status(), equalTo(rejectionStatus)); - } else { - assertFalse(response.isFailed()); - } - } - - // Check that the non-rejected updates made it to the shard - try { - assertDocCount(shard, items.length - 1); - closeShards(shard); - } catch (IOException e) { - throw new AssertionError(e); - } - }), latch::countDown), - threadPool, - Names.WRITE - ); - - latch.await(); - } - public void testExecuteBulkIndexRequestWithMappingUpdates() throws Exception { BulkItemRequest[] items = new BulkItemRequest[1]; @@ -313,7 +250,7 @@ public void testExecuteBulkIndexRequestWithMappingUpdates() throws Exception { ); when(shard.mapperService()).thenReturn(mock(MapperService.class)); - randomlySetIgnoredPrimaryResponse(items[0]); + items[0] = randomlySetIgnoredPrimaryResponse(items[0]); // Pretend the mappings haven't made it to the node yet BulkPrimaryExecutionContext context = new BulkPrimaryExecutionContext(bulkShardRequest, shard); @@ -344,12 +281,13 @@ public void testExecuteBulkIndexRequestWithMappingUpdates() throws Exception { listener -> {}, ASSERTING_DONE_LISTENER ); + BulkShardRequest completedRequest = context.getBulkShardRequest(); // Verify that the shard "executed" the operation only once (1 for previous invocations plus // 1 for this execution) verify(shard, times(2)).applyIndexOperationOnPrimary(anyLong(), any(), any(), anyLong(), anyLong(), anyLong(), anyBoolean()); - BulkItemResponse primaryResponse = bulkShardRequest.items()[0].getPrimaryResponse(); + BulkItemResponse primaryResponse = completedRequest.items()[0].primaryResponse(); assertThat(primaryResponse.getItemId(), equalTo(0)); assertThat(primaryResponse.getId(), equalTo("id")); @@ -372,7 +310,7 @@ public void testExecuteBulkIndexRequestWithErrorWhileUpdatingMapping() throws Ex boolean errorOnWait = randomBoolean(); - randomlySetIgnoredPrimaryResponse(items[0]); + items[0] = randomlySetIgnoredPrimaryResponse(items[0]); BulkPrimaryExecutionContext context = new BulkPrimaryExecutionContext(bulkShardRequest, shard); final CountDownLatch latch = new CountDownLatch(1); @@ -394,11 +332,12 @@ public void onFailure(final Exception e) { ); latch.await(); assertFalse(context.hasMoreOperationsToExecute()); + BulkShardRequest completedRequest = context.getBulkShardRequest(); // Translog shouldn't be synced, as there were conflicting mappings assertThat(context.getLocationToSync(), nullValue()); - BulkItemResponse primaryResponse = bulkShardRequest.items()[0].getPrimaryResponse(); + BulkItemResponse primaryResponse = completedRequest.items()[0].primaryResponse(); // Since this was not a conflict failure, the primary response // should be filled out with the failure information @@ -425,7 +364,7 @@ public void testExecuteBulkDeleteRequest() throws Exception { Translog.Location location = new Translog.Location(0, 0, 0); - randomlySetIgnoredPrimaryResponse(items[0]); + items[0] = randomlySetIgnoredPrimaryResponse(items[0]); BulkPrimaryExecutionContext context = new BulkPrimaryExecutionContext(bulkShardRequest, shard); TransportShardBulkAction.executeBulkItemRequest( @@ -437,13 +376,14 @@ public void testExecuteBulkDeleteRequest() throws Exception { ASSERTING_DONE_LISTENER ); assertFalse(context.hasMoreOperationsToExecute()); + BulkShardRequest completedRequest = context.getBulkShardRequest(); // Translog changes, even though the document didn't exist assertThat(context.getLocationToSync(), not(location)); - BulkItemRequest replicaRequest = bulkShardRequest.items()[0]; + BulkItemRequest replicaRequest = completedRequest.items()[0]; DocWriteRequest replicaDeleteRequest = replicaRequest.request(); - BulkItemResponse primaryResponse = replicaRequest.getPrimaryResponse(); + BulkItemResponse primaryResponse = replicaRequest.primaryResponse(); DeleteResponse response = primaryResponse.getResponse(); // Any version can be matched on replica @@ -472,7 +412,7 @@ public void testExecuteBulkDeleteRequest() throws Exception { location = context.getLocationToSync(); - randomlySetIgnoredPrimaryResponse(items[0]); + items[0] = randomlySetIgnoredPrimaryResponse(items[0]); context = new BulkPrimaryExecutionContext(bulkShardRequest, shard); TransportShardBulkAction.executeBulkItemRequest( @@ -484,13 +424,14 @@ public void testExecuteBulkDeleteRequest() throws Exception { ASSERTING_DONE_LISTENER ); assertFalse(context.hasMoreOperationsToExecute()); + completedRequest = context.getBulkShardRequest(); // Translog changes, because the document was deleted assertThat(context.getLocationToSync(), not(location)); - replicaRequest = bulkShardRequest.items()[0]; + replicaRequest = completedRequest.items()[0]; replicaDeleteRequest = replicaRequest.request(); - primaryResponse = replicaRequest.getPrimaryResponse(); + primaryResponse = replicaRequest.primaryResponse(); response = primaryResponse.getResponse(); // Any version can be matched on replica @@ -535,7 +476,7 @@ public void testNoopUpdateRequest() throws Exception { BulkItemRequest[] items = new BulkItemRequest[] { primaryRequest }; BulkShardRequest bulkShardRequest = new BulkShardRequest(shardId, RefreshPolicy.NONE, items); - randomlySetIgnoredPrimaryResponse(primaryRequest); + items[0] = randomlySetIgnoredPrimaryResponse(primaryRequest); BulkPrimaryExecutionContext context = new BulkPrimaryExecutionContext(bulkShardRequest, shard); TransportShardBulkAction.executeBulkItemRequest( @@ -546,19 +487,19 @@ public void testNoopUpdateRequest() throws Exception { listener -> {}, ASSERTING_DONE_LISTENER ); + BulkShardRequest completedRequest = context.getBulkShardRequest(); assertFalse(context.hasMoreOperationsToExecute()); // Basically nothing changes in the request since it's a noop assertThat(context.getLocationToSync(), nullValue()); - BulkItemResponse primaryResponse = bulkShardRequest.items()[0].getPrimaryResponse(); + BulkItemResponse primaryResponse = completedRequest.items()[0].primaryResponse(); assertThat(primaryResponse.getItemId(), equalTo(0)); assertThat(primaryResponse.getId(), equalTo("id")); assertThat(primaryResponse.getOpType(), equalTo(DocWriteRequest.OpType.UPDATE)); assertThat(primaryResponse.getResponse(), equalTo(noopUpdateResponse)); assertThat(primaryResponse.getResponse().getResult(), equalTo(DocWriteResponse.Result.NOOP)); - assertThat(bulkShardRequest.items().length, equalTo(1)); - assertEquals(primaryRequest, bulkShardRequest.items()[0]); // check that bulk item was not mutated + assertThat(completedRequest.items().length, equalTo(1)); assertThat(primaryResponse.getResponse().getSeqNo(), equalTo(0L)); } @@ -590,7 +531,7 @@ public void testUpdateRequestWithFailure() throws Exception { BulkItemRequest[] items = new BulkItemRequest[] { primaryRequest }; BulkShardRequest bulkShardRequest = new BulkShardRequest(shardId, RefreshPolicy.NONE, items); - randomlySetIgnoredPrimaryResponse(primaryRequest); + items[0] = randomlySetIgnoredPrimaryResponse(primaryRequest); BulkPrimaryExecutionContext context = new BulkPrimaryExecutionContext(bulkShardRequest, shard); TransportShardBulkAction.executeBulkItemRequest( @@ -606,7 +547,8 @@ public void testUpdateRequestWithFailure() throws Exception { // Since this was not a conflict failure, the primary response // should be filled out with the failure information assertNull(context.getLocationToSync()); - BulkItemResponse primaryResponse = bulkShardRequest.items()[0].getPrimaryResponse(); + BulkShardRequest completedRequest = context.getBulkShardRequest(); + BulkItemResponse primaryResponse = completedRequest.items()[0].primaryResponse(); assertThat(primaryResponse.getItemId(), equalTo(0)); assertThat(primaryResponse.getId(), equalTo("id")); assertThat(primaryResponse.getOpType(), equalTo(DocWriteRequest.OpType.UPDATE)); @@ -647,7 +589,7 @@ public void testUpdateRequestWithConflictFailure() throws Exception { BulkItemRequest[] items = new BulkItemRequest[] { primaryRequest }; BulkShardRequest bulkShardRequest = new BulkShardRequest(shardId, RefreshPolicy.NONE, items); - randomlySetIgnoredPrimaryResponse(primaryRequest); + items[0] = randomlySetIgnoredPrimaryResponse(primaryRequest); BulkPrimaryExecutionContext context = new BulkPrimaryExecutionContext(bulkShardRequest, shard); TransportShardBulkAction.executeBulkItemRequest( @@ -658,10 +600,11 @@ public void testUpdateRequestWithConflictFailure() throws Exception { listener -> listener.onResponse(null), ASSERTING_DONE_LISTENER ); + BulkShardRequest completedRequest = context.getBulkShardRequest(); assertFalse(context.hasMoreOperationsToExecute()); assertNull(context.getLocationToSync()); - BulkItemResponse primaryResponse = bulkShardRequest.items()[0].getPrimaryResponse(); + BulkItemResponse primaryResponse = completedRequest.items()[0].primaryResponse(); assertThat(primaryResponse.getItemId(), equalTo(0)); assertThat(primaryResponse.getId(), equalTo("id")); assertThat(primaryResponse.getOpType(), equalTo(DocWriteRequest.OpType.UPDATE)); @@ -704,7 +647,7 @@ public void testUpdateRequestWithSuccess() throws Exception { BulkItemRequest[] items = new BulkItemRequest[] { primaryRequest }; BulkShardRequest bulkShardRequest = new BulkShardRequest(shardId, RefreshPolicy.NONE, items); - randomlySetIgnoredPrimaryResponse(primaryRequest); + items[0] = randomlySetIgnoredPrimaryResponse(primaryRequest); BulkPrimaryExecutionContext context = new BulkPrimaryExecutionContext(bulkShardRequest, shard); TransportShardBulkAction.executeBulkItemRequest( @@ -715,14 +658,15 @@ public void testUpdateRequestWithSuccess() throws Exception { listener -> {}, ASSERTING_DONE_LISTENER ); + BulkShardRequest completedRequest = context.getBulkShardRequest(); assertFalse(context.hasMoreOperationsToExecute()); // Check that the translog is successfully advanced assertThat(context.getLocationToSync(), equalTo(resultLocation)); - assertThat(bulkShardRequest.items()[0].request(), equalTo(updateResponse)); + assertThat(completedRequest.items()[0].request(), equalTo(updateResponse)); // Since this was not a conflict failure, the primary response // should be filled out with the failure information - BulkItemResponse primaryResponse = bulkShardRequest.items()[0].getPrimaryResponse(); + BulkItemResponse primaryResponse = completedRequest.items()[0].primaryResponse(); assertThat(primaryResponse.getItemId(), equalTo(0)); assertThat(primaryResponse.getId(), equalTo("id")); assertThat(primaryResponse.getOpType(), equalTo(DocWriteRequest.OpType.UPDATE)); @@ -760,7 +704,7 @@ public void testUpdateWithDelete() throws Exception { BulkItemRequest[] items = new BulkItemRequest[] { primaryRequest }; BulkShardRequest bulkShardRequest = new BulkShardRequest(shardId, RefreshPolicy.NONE, items); - randomlySetIgnoredPrimaryResponse(primaryRequest); + items[0] = randomlySetIgnoredPrimaryResponse(primaryRequest); BulkPrimaryExecutionContext context = new BulkPrimaryExecutionContext(bulkShardRequest, shard); TransportShardBulkAction.executeBulkItemRequest( @@ -771,12 +715,13 @@ public void testUpdateWithDelete() throws Exception { listener -> listener.onResponse(null), ASSERTING_DONE_LISTENER ); + BulkShardRequest completedRequest = context.getBulkShardRequest(); assertFalse(context.hasMoreOperationsToExecute()); // Check that the translog is successfully advanced assertThat(context.getLocationToSync(), equalTo(resultLocation)); - assertThat(bulkShardRequest.items()[0].request(), equalTo(updateResponse)); - BulkItemResponse primaryResponse = bulkShardRequest.items()[0].getPrimaryResponse(); + assertThat(completedRequest.items()[0].request(), equalTo(updateResponse)); + BulkItemResponse primaryResponse = completedRequest.items()[0].primaryResponse(); assertThat(primaryResponse.getItemId(), equalTo(0)); assertThat(primaryResponse.getId(), equalTo("id")); assertThat(primaryResponse.getOpType(), equalTo(DocWriteRequest.OpType.UPDATE)); @@ -797,7 +742,7 @@ public void testFailureDuringUpdateProcessing() throws Exception { BulkItemRequest[] items = new BulkItemRequest[] { primaryRequest }; BulkShardRequest bulkShardRequest = new BulkShardRequest(shardId, RefreshPolicy.NONE, items); - randomlySetIgnoredPrimaryResponse(primaryRequest); + items[0] = randomlySetIgnoredPrimaryResponse(primaryRequest); BulkPrimaryExecutionContext context = new BulkPrimaryExecutionContext(bulkShardRequest, shard); TransportShardBulkAction.executeBulkItemRequest( @@ -808,10 +753,11 @@ public void testFailureDuringUpdateProcessing() throws Exception { listener -> {}, ASSERTING_DONE_LISTENER ); + BulkShardRequest completedRequest = context.getBulkShardRequest(); assertFalse(context.hasMoreOperationsToExecute()); assertNull(context.getLocationToSync()); - BulkItemResponse primaryResponse = bulkShardRequest.items()[0].getPrimaryResponse(); + BulkItemResponse primaryResponse = completedRequest.items()[0].primaryResponse(); assertThat(primaryResponse.getItemId(), equalTo(0)); assertThat(primaryResponse.getId(), equalTo("id")); assertThat(primaryResponse.getOpType(), equalTo(DocWriteRequest.OpType.UPDATE)); @@ -848,7 +794,7 @@ public void testFailedUpdatePreparationDoesNotTriggerRefresh() throws Exception BulkItemRequest[] items = new BulkItemRequest[] { primaryRequest }; BulkShardRequest bulkShardRequest = new BulkShardRequest(shardId, RefreshPolicy.IMMEDIATE, items); - randomlySetIgnoredPrimaryResponse(primaryRequest); + items[0] = randomlySetIgnoredPrimaryResponse(primaryRequest); // Execute the bulk operation through performOnPrimary CountDownLatch latch = new CountDownLatch(1); @@ -1131,10 +1077,11 @@ public void testTranslogPositionToSync() throws Exception { public void testNoOpReplicationOnPrimaryDocumentFailure() throws Exception { final IndexShard shard = spy(newStartedShard(false)); - BulkItemRequest itemRequest = new BulkItemRequest(0, new IndexRequest("index").source(Requests.INDEX_CONTENT_TYPE)); final String failureMessage = "simulated primary failure"; final IOException exception = new IOException(failureMessage); - itemRequest.setPrimaryResponse( + BulkItemRequest itemRequest = new BulkItemRequest( + 0, + new IndexRequest("index").source(Requests.INDEX_CONTENT_TYPE), new BulkItemResponse( 0, randomFrom(DocWriteRequest.OpType.CREATE, DocWriteRequest.OpType.DELETE, DocWriteRequest.OpType.INDEX), @@ -1204,7 +1151,7 @@ public void testRetries() throws Exception { listener -> listener.onResponse(null), new LatchedActionListener<>(ActionTestUtils.assertNoFailureListener(result -> { assertThat(((WritePrimaryResult) result).location, equalTo(resultLocation)); - BulkItemResponse primaryResponse = result.replicaRequest().items()[0].getPrimaryResponse(); + BulkItemResponse primaryResponse = result.replicaRequest().items()[0].primaryResponse(); assertThat(primaryResponse.getItemId(), equalTo(0)); assertThat(primaryResponse.getId(), equalTo("id")); assertThat(primaryResponse.getOpType(), equalTo(DocWriteRequest.OpType.UPDATE)); @@ -1256,6 +1203,7 @@ public void testUpdateWithRetryOnConflict() throws IOException, InterruptedExcep BulkShardRequest bulkShardRequest = new BulkShardRequest(shardId, RefreshPolicy.NONE, items.toArray(BulkItemRequest[]::new)); + BulkShardRequest[] completedRequest = new BulkShardRequest[1]; final CountDownLatch latch = new CountDownLatch(1); Runnable runnable = () -> TransportShardBulkAction.performOnPrimary( bulkShardRequest, @@ -1267,8 +1215,9 @@ public void testUpdateWithRetryOnConflict() throws IOException, InterruptedExcep new LatchedActionListener<>(ActionTestUtils.assertNoFailureListener(result -> { assertEquals(nItems, result.replicaRequest().items().length); for (BulkItemRequest item : result.replicaRequest().items()) { - assertEquals(VersionConflictEngineException.class, item.getPrimaryResponse().getFailure().getCause().getClass()); + assertEquals(VersionConflictEngineException.class, item.primaryResponse().getFailure().getCause().getClass()); } + completedRequest[0] = result.replicaRequest(); }), latch), threadPool, Names.WRITE @@ -1280,8 +1229,8 @@ public void testUpdateWithRetryOnConflict() throws IOException, InterruptedExcep // timeout the request in 10 seconds if there is an infinite loop assertTrue(latch.await(10, TimeUnit.SECONDS)); - items.forEach(item -> { - assertEquals(item.getPrimaryResponse().getFailure().getCause().getClass(), VersionConflictEngineException.class); + for (BulkItemRequest item : completedRequest[0].items()) { + assertEquals(item.primaryResponse().getFailure().getCause().getClass(), VersionConflictEngineException.class); // this assertion is based on the assumption that all bulk item requests are updates and are hence calling // UpdateRequest::prepareRequest @@ -1291,7 +1240,7 @@ public void testUpdateWithRetryOnConflict() throws IOException, InterruptedExcep any(IndexShard.class), any(LongSupplier.class) ); - }); + } } public void testForceExecutionOnRejectionAfterMappingUpdate() throws Exception { @@ -1344,10 +1293,11 @@ public void testForceExecutionOnRejectionAfterMappingUpdate() throws Exception { when(shard.getFailedIndexResult(any(OpenSearchRejectedExecutionException.class), anyLong())).thenCallRealMethod(); when(shard.mapperService()).thenReturn(mock(MapperService.class)); - randomlySetIgnoredPrimaryResponse(items[0]); + items[0] = randomlySetIgnoredPrimaryResponse(items[0]); AtomicInteger updateCalled = new AtomicInteger(); + BulkShardRequest[] completedRequest = new BulkShardRequest[1]; final CountDownLatch latch = new CountDownLatch(1); TransportShardBulkAction.performOnPrimary( bulkShardRequest, @@ -1368,9 +1318,11 @@ public void testForceExecutionOnRejectionAfterMappingUpdate() throws Exception { } }, listener -> listener.onResponse(null), - new LatchedActionListener<>(ActionTestUtils.assertNoFailureListener(result -> - // Assert that we still need to fsync the location that was successfully written - assertThat(((WritePrimaryResult) result).location, equalTo(resultLocation1))), latch), + new LatchedActionListener<>(ActionTestUtils.assertNoFailureListener(result -> { + // Assert that we still need to fsync the location that was successfully written + assertThat(((WritePrimaryResult) result).location, equalTo(resultLocation1)); + completedRequest[0] = result.replicaRequest(); + }), latch), rejectingThreadPool, Names.WRITE ); @@ -1380,7 +1332,7 @@ public void testForceExecutionOnRejectionAfterMappingUpdate() throws Exception { verify(shard, times(2)).applyIndexOperationOnPrimary(anyLong(), any(), any(), anyLong(), anyLong(), anyLong(), anyBoolean()); - BulkItemResponse primaryResponse1 = bulkShardRequest.items()[0].getPrimaryResponse(); + BulkItemResponse primaryResponse1 = completedRequest[0].items()[0].primaryResponse(); assertThat(primaryResponse1.getItemId(), equalTo(0)); assertThat(primaryResponse1.getId(), equalTo("id")); assertThat(primaryResponse1.getOpType(), equalTo(DocWriteRequest.OpType.INDEX)); @@ -1388,13 +1340,13 @@ public void testForceExecutionOnRejectionAfterMappingUpdate() throws Exception { assertThat(primaryResponse1.getResponse().status(), equalTo(RestStatus.CREATED)); assertThat(primaryResponse1.getResponse().getSeqNo(), equalTo(10L)); - BulkItemResponse primaryResponse2 = bulkShardRequest.items()[1].getPrimaryResponse(); + BulkItemResponse primaryResponse2 = completedRequest[0].items()[1].primaryResponse(); assertThat(primaryResponse2.getItemId(), equalTo(1)); assertThat(primaryResponse2.getId(), equalTo("id")); assertThat(primaryResponse2.getOpType(), equalTo(DocWriteRequest.OpType.INDEX)); assertTrue(primaryResponse2.isFailed()); assertNull(primaryResponse2.getResponse()); - assertEquals(primaryResponse2.status(), RestStatus.TOO_MANY_REQUESTS); + assertEquals(RestStatus.TOO_MANY_REQUESTS, primaryResponse2.status()); assertThat(primaryResponse2.getFailure().getCause(), instanceOf(OpenSearchRejectedExecutionException.class)); closeShards(shard); @@ -1570,10 +1522,12 @@ private TransportChannel createTransportChannel(final PlainActionFuture { + throw new AssertionError("unexpected partial merge failure", e); + } + ); + + // No results consumed — all shards failed. + SearchPhaseController.ReducedQueryPhase result = consumer.reduce(); + assertTrue("result should be marked as empty", result.isEmptyResult); + assertNull("aggregations should be null when no shards returned results", result.aggregations); + assertEquals(0, result.totalHits.value()); + assertEquals(0, result.sortedTopDocs.scoreDocs.length); + } + private static class ThrowingSearchProgressListener extends SearchProgressListener { private final AtomicInteger onQueryResult = new AtomicInteger(0); private final AtomicInteger onPartialReduce = new AtomicInteger(0); diff --git a/server/src/test/java/org/opensearch/cluster/routing/allocation/BalanceConfigurationTests.java b/server/src/test/java/org/opensearch/cluster/routing/allocation/BalanceConfigurationTests.java index e53ca2a487840..93b9c5ea0e94f 100644 --- a/server/src/test/java/org/opensearch/cluster/routing/allocation/BalanceConfigurationTests.java +++ b/server/src/test/java/org/opensearch/cluster/routing/allocation/BalanceConfigurationTests.java @@ -69,7 +69,6 @@ import java.util.List; import java.util.Map; import java.util.Set; -import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import static org.opensearch.cluster.ClusterName.CLUSTER_NAME_SETTING; @@ -324,7 +323,7 @@ public void testPrimaryBalanceSolvedWithPreferPrimaryRebalanceSetting() { logger.info(ShardAllocations.printShardDistribution(clusterState)); try { verifyPrimaryBalance(clusterState, buffer); - } catch (Exception e) { + } catch (AssertionError | Exception e) { balanceFailed++; logger.info("Unexpected assertion failure"); } @@ -647,23 +646,21 @@ private void verifyPerIndexPrimaryBalance(ClusterState currentState) { } } - private void verifyPrimaryBalance(ClusterState clusterState, float buffer) throws Exception { - assertBusy(() -> { - RoutingNodes nodes = clusterState.getRoutingNodes(); - int totalPrimaryShards = 0; - for (final IndexRoutingTable index : clusterState.getRoutingTable().indicesRouting().values()) { - totalPrimaryShards += index.primaryShardsActive(); - } - final int avgPrimaryShardsPerNode = (int) Math.ceil(totalPrimaryShards * 1f / clusterState.getRoutingNodes().size()); - for (RoutingNode node : nodes) { - final int primaryCount = node.shardsWithState(STARTED) - .stream() - .filter(ShardRouting::primary) - .collect(Collectors.toList()) - .size(); - assertTrue(primaryCount <= (avgPrimaryShardsPerNode * (1 + buffer))); - } - }, 60, TimeUnit.SECONDS); + private static void verifyPrimaryBalance(ClusterState clusterState, float buffer) { + RoutingNodes nodes = clusterState.getRoutingNodes(); + int totalPrimaryShards = 0; + for (final IndexRoutingTable index : clusterState.getRoutingTable().indicesRouting().values()) { + totalPrimaryShards += index.primaryShardsActive(); + } + final int avgPrimaryShardsPerNode = (int) Math.ceil(totalPrimaryShards * 1f / clusterState.getRoutingNodes().size()); + for (RoutingNode node : nodes) { + final int primaryCount = node.shardsWithState(STARTED) + .stream() + .filter(ShardRouting::primary) + .collect(Collectors.toList()) + .size(); + assertTrue(primaryCount <= (avgPrimaryShardsPerNode * (1 + buffer))); + } } public void testShardBalance() { diff --git a/server/src/test/java/org/opensearch/index/IndexModuleTests.java b/server/src/test/java/org/opensearch/index/IndexModuleTests.java index 57ba262b790ea..bcab71432dd1d 100644 --- a/server/src/test/java/org/opensearch/index/IndexModuleTests.java +++ b/server/src/test/java/org/opensearch/index/IndexModuleTests.java @@ -85,6 +85,7 @@ import org.opensearch.index.engine.EngineConfigFactory; import org.opensearch.index.engine.InternalEngineFactory; import org.opensearch.index.engine.InternalEngineTests; +import org.opensearch.index.engine.dataformat.DataFormatRegistry; import org.opensearch.index.fielddata.IndexFieldDataCache; import org.opensearch.index.mapper.ParsedDocument; import org.opensearch.index.mapper.Uid; @@ -282,7 +283,7 @@ private IndexService newIndexService(IndexModule module) throws IOException { null, () -> TieredMergePolicyProvider.DEFAULT_MAX_MERGE_AT_ONCE, mockClusterMergeSchedulerConfig, - null + (DataFormatRegistry) null ); } diff --git a/server/src/test/java/org/opensearch/index/mapper/BinaryFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/BinaryFieldMapperTests.java index 87b5ad3434944..12b40a36947e3 100644 --- a/server/src/test/java/org/opensearch/index/mapper/BinaryFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/BinaryFieldMapperTests.java @@ -32,8 +32,12 @@ package org.opensearch.index.mapper; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.util.BytesRef; +import org.opensearch.common.CheckedConsumer; import org.opensearch.common.io.stream.BytesStreamOutput; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.core.common.bytes.BytesArray; import org.opensearch.core.common.bytes.BytesReference; import org.opensearch.core.compress.CompressorRegistry; @@ -135,4 +139,87 @@ public void testStoredValue() throws IOException { assertEquals(new BytesArray(value), originalValue); } } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatBinaryValue() throws Exception { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + DocumentMapper mapper = createDocumentMapper( + pluggableSettings, + mapping(b -> b.startObject("field").field("type", "binary").field("doc_values", true).endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + byte[] testValue = new byte[] { 1, 2, 3 }; + String base64Value = java.util.Base64.getEncoder().encodeToString(testValue); + mapper.parse(source(b -> b.field("field", base64Value)), docInput); + + boolean found = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals("field")); + assertTrue("Expected binary field to be captured", found); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatBinaryNullSkipped() throws Exception { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + DocumentMapper mapper = createDocumentMapper( + pluggableSettings, + mapping(b -> b.startObject("field").field("type", "binary").field("doc_values", true).endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.nullField("field")), docInput); + + boolean found = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals("field")); + assertFalse("Expected no binary field to be captured for null value", found); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggablePathEquivalenceWithLucenePath() throws Exception { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + + // Scenario 1: binary value — both paths produce a field + byte[] testValue = new byte[] { 1, 2, 3 }; + String base64Value = java.util.Base64.getEncoder().encodeToString(testValue); + assertBinaryLuceneAndPluggablePathsEquivalent( + pluggableSettings, + mapping(b -> b.startObject("field").field("type", "binary").field("doc_values", true).endObject()), + b -> b.field("field", base64Value), + "field", + true + ); + + // Scenario 2: null value — no field produced + assertBinaryLuceneAndPluggablePathsEquivalent( + pluggableSettings, + mapping(b -> b.startObject("field").field("type", "binary").field("doc_values", true).endObject()), + b -> b.nullField("field"), + "field", + false + ); + } + + private void assertBinaryLuceneAndPluggablePathsEquivalent( + Settings pluggableSettings, + XContentBuilder mappingBuilder, + CheckedConsumer sourceBuilder, + String fieldName, + boolean expectField + ) throws IOException { + // Lucene path + DocumentMapper luceneMapper = createDocumentMapper(mappingBuilder); + ParsedDocument luceneDoc = luceneMapper.parse(source(sourceBuilder)); + IndexableField[] luceneFields = luceneDoc.rootDoc().getFields(fieldName); + + // Pluggable path + DocumentMapper pluggableMapper = createDocumentMapper(pluggableSettings, mappingBuilder); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + pluggableMapper.parse(source(sourceBuilder), docInput); + + boolean pluggableHasField = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals(fieldName)); + + if (!expectField) { + assertEquals("Lucene path should produce no field for '" + fieldName + "'", 0, luceneFields.length); + assertFalse("Pluggable path should produce no field for '" + fieldName + "'", pluggableHasField); + } else { + assertTrue("Lucene path should produce field '" + fieldName + "'", luceneFields.length > 0); + assertTrue("Pluggable path should capture field '" + fieldName + "'", pluggableHasField); + } + } } diff --git a/server/src/test/java/org/opensearch/index/mapper/BooleanFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/BooleanFieldMapperTests.java index 894e76d0ea442..4386a3ccc06e6 100644 --- a/server/src/test/java/org/opensearch/index/mapper/BooleanFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/BooleanFieldMapperTests.java @@ -47,6 +47,9 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.opensearch.common.Booleans; +import org.opensearch.common.CheckedConsumer; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.core.xcontent.ToXContent; import org.opensearch.core.xcontent.XContentBuilder; @@ -314,4 +317,133 @@ private org.apache.lucene.document.Document createDocument(String value, boolean } return doc; } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatBooleanTrue() throws IOException { + Settings settings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + DocumentMapper mapper = createDocumentMapper(settings, fieldMapping(this::minimalMapping)); + + CapturingDocumentInput capturingDocInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.field("field", true)), capturingDocInput); + + assertTrue( + capturingDocInput.getCapturedFields() + .stream() + .anyMatch(e -> e.getKey().name().equals("field") && Boolean.TRUE.equals(e.getValue())) + ); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatBooleanFalse() throws IOException { + Settings settings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + DocumentMapper mapper = createDocumentMapper(settings, fieldMapping(this::minimalMapping)); + + CapturingDocumentInput capturingDocInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.field("field", false)), capturingDocInput); + + assertTrue( + capturingDocInput.getCapturedFields() + .stream() + .anyMatch(e -> e.getKey().name().equals("field") && Boolean.FALSE.equals(e.getValue())) + ); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatBooleanNullSkipped() throws IOException { + Settings settings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + DocumentMapper mapper = createDocumentMapper(settings, fieldMapping(this::minimalMapping)); + + CapturingDocumentInput capturingDocInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.nullField("field")), capturingDocInput); + + assertTrue(capturingDocInput.getCapturedFields().stream().noneMatch(e -> e.getKey().name().equals("field"))); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatBooleanNullValueConfigured() throws IOException { + Settings settings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + DocumentMapper mapper = createDocumentMapper( + settings, + mapping(b -> b.startObject("field").field("type", "boolean").field("null_value", true).endObject()) + ); + CapturingDocumentInput capturingDocInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.nullField("field")), capturingDocInput); + + assertTrue( + capturingDocInput.getCapturedFields() + .stream() + .anyMatch(e -> e.getKey().name().equals("field") && Boolean.TRUE.equals(e.getValue())) + ); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggablePathEquivalenceWithLucenePath() throws IOException { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + + // Scenario 1: true value + assertBooleanLuceneAndPluggablePathsEquivalent( + pluggableSettings, + fieldMapping(this::minimalMapping), + b -> b.field("field", true), + "field", + Boolean.TRUE + ); + + // Scenario 2: false value + assertBooleanLuceneAndPluggablePathsEquivalent( + pluggableSettings, + fieldMapping(this::minimalMapping), + b -> b.field("field", false), + "field", + Boolean.FALSE + ); + + // Scenario 3: null value — no field produced + assertBooleanLuceneAndPluggablePathsEquivalent( + pluggableSettings, + fieldMapping(this::minimalMapping), + b -> b.nullField("field"), + "field", + null + ); + + // Scenario 4: null_value configured — substitution kicks in + assertBooleanLuceneAndPluggablePathsEquivalent( + pluggableSettings, + mapping(b -> b.startObject("field").field("type", "boolean").field("null_value", true).endObject()), + b -> b.nullField("field"), + "field", + Boolean.TRUE + ); + } + + private void assertBooleanLuceneAndPluggablePathsEquivalent( + Settings pluggableSettings, + XContentBuilder mappingBuilder, + CheckedConsumer sourceBuilder, + String fieldName, + Boolean expectedValue + ) throws IOException { + // Lucene path + DocumentMapper luceneMapper = createDocumentMapper(mappingBuilder); + ParsedDocument luceneDoc = luceneMapper.parse(source(sourceBuilder)); + IndexableField[] luceneFields = luceneDoc.rootDoc().getFields(fieldName); + + // Pluggable path + DocumentMapper pluggableMapper = createDocumentMapper(pluggableSettings, mappingBuilder); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + pluggableMapper.parse(source(sourceBuilder), docInput); + + if (expectedValue == null) { + assertEquals("Lucene path should produce no field for '" + fieldName + "'", 0, luceneFields.length); + boolean pluggableHasField = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals(fieldName)); + assertFalse("Pluggable path should produce no field for '" + fieldName + "'", pluggableHasField); + } else { + assertTrue("Lucene path should produce field '" + fieldName + "'", luceneFields.length > 0); + boolean pluggableFound = docInput.getCapturedFields() + .stream() + .anyMatch(e -> e.getKey().name().equals(fieldName) && expectedValue.equals(e.getValue())); + assertTrue("Pluggable path should capture field '" + fieldName + "' with value '" + expectedValue + "'", pluggableFound); + } + } } diff --git a/server/src/test/java/org/opensearch/index/mapper/CompletionFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/CompletionFieldMapperTests.java index b1785f5d7b14c..99a75d58496cb 100644 --- a/server/src/test/java/org/opensearch/index/mapper/CompletionFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/CompletionFieldMapperTests.java @@ -46,7 +46,9 @@ import org.apache.lucene.util.CharsRefBuilder; import org.apache.lucene.util.automaton.Operations; import org.apache.lucene.util.automaton.RegExp; +import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.Fuzziness; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.xcontent.json.JsonXContent; import org.opensearch.core.common.Strings; import org.opensearch.core.common.bytes.BytesReference; @@ -791,4 +793,15 @@ protected V featureValueOf(T actual) { }; } + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatCompletionNoOp() throws IOException { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + DocumentMapper mapper = createDocumentMapper(pluggableSettings, fieldMapping(this::minimalMapping)); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.field("field", "suggestion")), docInput); + + boolean hasField = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals("field")); + assertFalse("CompletionFieldMapper pluggable format is no-op, should not capture", hasField); + } + } diff --git a/server/src/test/java/org/opensearch/index/mapper/ConstantKeywordFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/ConstantKeywordFieldMapperTests.java index 9fcadcfb36b69..d1c79afcb568f 100644 --- a/server/src/test/java/org/opensearch/index/mapper/ConstantKeywordFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/ConstantKeywordFieldMapperTests.java @@ -19,6 +19,7 @@ import org.opensearch.common.CheckedConsumer; import org.opensearch.common.compress.CompressedXContent; import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.common.xcontent.json.JsonXContent; import org.opensearch.core.common.bytes.BytesReference; @@ -164,4 +165,33 @@ private ConstantKeywordFieldMapper getMapper(FieldMapper.CopyTo copyTo) { mapper.copyTo = copyTo; return mapper; } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatConstantKeywordValidates() throws Exception { + indexService = createIndexWithSimpleMappings( + "test-pluggable", + Settings.builder().put("index.pluggable.dataformat.enabled", true).build(), + "field", + "type=constant_keyword,value=foo" + ); + ConstantKeywordFieldMapper mapper = (ConstantKeywordFieldMapper) indexService.mapperService() + .documentMapper() + .mappers() + .getMapper(FIELD_NAME); + // parseCreateFieldForPluggableFormat just validates, doesn't addField + // calling directly since it's protected and same package + // valid value should not throw + assertNotNull(mapper); + // Test via document parse — valid value should succeed + indexService.mapperService() + .documentMapper() + .parse( + new SourceToParse( + "test-pluggable", + "1", + BytesReference.bytes(XContentFactory.jsonBuilder().startObject().field("field", "foo").endObject()), + MediaTypeRegistry.JSON + ) + ); + } } diff --git a/server/src/test/java/org/opensearch/index/mapper/ContextAwareGroupingFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/ContextAwareGroupingFieldMapperTests.java index 1a4a4210fcc9e..e9668fdcf6274 100644 --- a/server/src/test/java/org/opensearch/index/mapper/ContextAwareGroupingFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/ContextAwareGroupingFieldMapperTests.java @@ -150,6 +150,17 @@ public void testIngestAttemptThrowsException() { assertTrue(e.getMessage().contains("context_aware_grouping cannot be ingested in the document")); } + public void testPluggableDataFormatIngestThrowsException() { + ContextAwareGroupingFieldType fieldType = new ContextAwareGroupingFieldType(Collections.emptyList(), null); + ContextAwareGroupingFieldMapper mapper = new ContextAwareGroupingFieldMapper( + "context_aware_grouping", + fieldType, + new ContextAwareGroupingFieldMapper.Builder("context_aware_grouping") + ); + MapperParsingException e = expectThrows(MapperParsingException.class, () -> mapper.parseCreateFieldForPluggableFormat(null)); + assertTrue(e.getMessage().contains("context_aware_grouping cannot be ingested in the document")); + } + public void testContextAwareFieldMapperWithDerivedSource() throws IOException { ContextAwareGroupingFieldType fieldType = new ContextAwareGroupingFieldType(Collections.emptyList(), null); ContextAwareGroupingFieldMapper mapper = new ContextAwareGroupingFieldMapper( diff --git a/server/src/test/java/org/opensearch/index/mapper/DateFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/DateFieldMapperTests.java index e03cbfdd5bec4..54ac0a0d22f95 100644 --- a/server/src/test/java/org/opensearch/index/mapper/DateFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/DateFieldMapperTests.java @@ -46,6 +46,7 @@ import org.apache.lucene.index.VectorSimilarityFunction; import org.opensearch.Version; import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.CheckedConsumer; import org.opensearch.common.settings.Settings; import org.opensearch.common.time.DateFormatter; import org.opensearch.common.util.FeatureFlags; @@ -936,4 +937,89 @@ public void testSkipListIntegrationFieldBehaviorConsistency() throws IOException ); assertEquals("Expected timestamp should match", expectedTimestamp, fieldsEnabled[0].numericValue().longValue()); } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatDateValue() throws Exception { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + DocumentMapper mapper = createDocumentMapper( + pluggableSettings, + mapping(b -> b.startObject("field").field("type", "date").endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.field("field", "2025-02-18T06:00:00.000Z")), docInput); + + boolean found = docInput.getCapturedFields() + .stream() + .anyMatch(e -> e.getKey().name().equals("field") && e.getValue().equals(TEST_TIMESTAMP)); + assertTrue("Expected timestamp for date field", found); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatDateNullSkipped() throws Exception { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + DocumentMapper mapper = createDocumentMapper( + pluggableSettings, + mapping(b -> b.startObject("field").field("type", "date").endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.nullField("field")), docInput); + + boolean hasDateField = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals("field")); + assertFalse("Expected no captured field for null value", hasDateField); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggablePathEquivalenceWithLucenePath() throws Exception { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + + // Scenario 1: date value + assertDateLuceneAndPluggablePathsEquivalent( + pluggableSettings, + mapping(b -> b.startObject("field").field("type", "date").endObject()), + b -> b.field("field", "2025-02-18T06:00:00.000Z"), + "field", + TEST_TIMESTAMP + ); + + // Scenario 2: null value — no field produced + assertDateLuceneAndPluggablePathsEquivalent( + pluggableSettings, + mapping(b -> b.startObject("field").field("type", "date").endObject()), + b -> b.nullField("field"), + "field", + null + ); + } + + private void assertDateLuceneAndPluggablePathsEquivalent( + Settings pluggableSettings, + XContentBuilder mappingBuilder, + CheckedConsumer sourceBuilder, + String fieldName, + Long expectedValue + ) throws IOException { + // Lucene path + DocumentMapper luceneMapper = createDocumentMapper(mappingBuilder); + ParsedDocument luceneDoc = luceneMapper.parse(source(sourceBuilder)); + IndexableField[] luceneFields = luceneDoc.rootDoc().getFields(fieldName); + + // Pluggable path + DocumentMapper pluggableMapper = createDocumentMapper(pluggableSettings, mappingBuilder); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + pluggableMapper.parse(source(sourceBuilder), docInput); + + if (expectedValue == null) { + assertEquals("Lucene path should produce no field for '" + fieldName + "'", 0, luceneFields.length); + boolean pluggableHasField = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals(fieldName)); + assertFalse("Pluggable path should produce no field for '" + fieldName + "'", pluggableHasField); + } else { + assertTrue("Lucene path should produce field '" + fieldName + "'", luceneFields.length > 0); + assertEquals(expectedValue.longValue(), luceneFields[0].numericValue().longValue()); + + boolean pluggableFound = docInput.getCapturedFields() + .stream() + .anyMatch(e -> e.getKey().name().equals(fieldName) && e.getValue().equals(expectedValue)); + assertTrue("Pluggable path should capture field '" + fieldName + "' with value '" + expectedValue + "'", pluggableFound); + } + } } diff --git a/server/src/test/java/org/opensearch/index/mapper/DerivedFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/DerivedFieldMapperTests.java index e3e2b978369ff..39e5f491569fa 100644 --- a/server/src/test/java/org/opensearch/index/mapper/DerivedFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/DerivedFieldMapperTests.java @@ -114,4 +114,17 @@ public void testFieldInDerivedAndProperties() throws IOException { // TODO TESTCASE: testWithFieldInSource() (derived field with that field present in source) // This is more checking search behavior so may need to revisit this after query implementation + public void testPluggableDataFormatDerivedFieldThrows() throws IOException { + DocumentMapper mapper = createDocumentMapper(topMapping(b -> { + b.startObject("derived"); + b.startObject("derived_field"); + b.field("type", "keyword"); + b.startObject("script").field("source", "emit(params._source.field)").field("lang", "painless").endObject(); + b.endObject(); + b.endObject(); + })); + DerivedFieldMapper derivedMapper = (DerivedFieldMapper) mapper.mappers().getMapper("derived_field"); + assertNotNull(derivedMapper); + expectThrows(UnsupportedOperationException.class, () -> derivedMapper.parseCreateFieldForPluggableFormat(null)); + } } diff --git a/server/src/test/java/org/opensearch/index/mapper/DocumentFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/DocumentFieldMapperTests.java index c74b5745cd83f..1bb7d0b3bb427 100644 --- a/server/src/test/java/org/opensearch/index/mapper/DocumentFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/DocumentFieldMapperTests.java @@ -104,6 +104,9 @@ static class FakeFieldMapper extends ParametrizedFieldMapper { @Override protected void parseCreateField(ParseContext context) {} + @Override + protected void parseCreateFieldForPluggableFormat(ParseContext context) {} + @Override protected String contentType() { return null; diff --git a/server/src/test/java/org/opensearch/index/mapper/DocumentMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/DocumentMapperTests.java index fa6ef72552faf..0e410c0d791c5 100644 --- a/server/src/test/java/org/opensearch/index/mapper/DocumentMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/DocumentMapperTests.java @@ -40,6 +40,7 @@ import org.opensearch.index.analysis.AnalyzerScope; import org.opensearch.index.analysis.IndexAnalyzers; import org.opensearch.index.analysis.NamedAnalyzer; +import org.opensearch.index.engine.dataformat.DocumentInput; import org.opensearch.index.mapper.MapperService.MergeReason; import java.io.IOException; @@ -54,6 +55,7 @@ import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.notNullValue; import static org.hamcrest.Matchers.nullValue; +import static org.hamcrest.Matchers.sameInstance; public class DocumentMapperTests extends MapperServiceTestCase { @@ -291,4 +293,91 @@ public void testMergeMetaForIndexTemplate() throws IOException { expected = Map.of("field", "value", "object", Map.of("field1", "value1", "field2", "new_value", "field3", "value3")); assertThat(mergedMapper.meta(), equalTo(expected)); } + + public void testParseWithoutDocumentInputReturnsNullDocumentInput() throws IOException { + DocumentMapper mapper = createDocumentMapper(mapping(b -> b.startObject("field").field("type", "text").endObject())); + + ParsedDocument doc = mapper.parse(source(b -> b.field("field", "value"))); + + assertThat(doc.getDocumentInput(), nullValue()); + } + + public void testParseWithDocumentInputPropagatesInput() throws IOException { + DocumentMapper mapper = createDocumentMapper(mapping(b -> b.startObject("field").field("type", "text").endObject())); + DocumentInput> mockInput = new MockDocumentInput(); + + ParsedDocument doc = mapper.parse(source(b -> b.field("field", "value")), mockInput); + + assertThat(doc.getDocumentInput(), sameInstance(mockInput)); + } + + public void testParseWithNullDocumentInputExplicitly() throws IOException { + DocumentMapper mapper = createDocumentMapper(mapping(b -> b.startObject("field").field("type", "text").endObject())); + + ParsedDocument doc = mapper.parse(source(b -> b.field("field", "value")), null); + + assertThat(doc.getDocumentInput(), nullValue()); + } + + public void testParseWithDocumentInputMultipleFields() throws IOException { + DocumentMapper mapper = createDocumentMapper(mapping(b -> { + b.startObject("name").field("type", "text").endObject(); + b.startObject("age").field("type", "integer").endObject(); + })); + DocumentInput> mockInput = new MockDocumentInput(); + + ParsedDocument doc = mapper.parse(source(b -> { + b.field("name", "test"); + b.field("age", 25); + }), mockInput); + + assertThat(doc.getDocumentInput(), sameInstance(mockInput)); + assertThat(doc.rootDoc().getField("name"), notNullValue()); + assertThat(doc.rootDoc().getField("age"), notNullValue()); + } + + public void testParseWithDocumentInputNestedObject() throws IOException { + DocumentMapper mapper = createDocumentMapper(mapping(b -> { + b.startObject("obj"); + { + b.startObject("properties"); + { + b.startObject("field").field("type", "text").endObject(); + } + b.endObject(); + } + b.endObject(); + })); + DocumentInput> mockInput = new MockDocumentInput(); + + ParsedDocument doc = mapper.parse(source(b -> { + b.startObject("obj"); + b.field("field", "value"); + b.endObject(); + }), mockInput); + + assertThat(doc.getDocumentInput(), sameInstance(mockInput)); + } + + private static class MockDocumentInput implements DocumentInput> { + private final Map fields = new HashMap<>(); + + @Override + public Map getFinalInput() { + return Collections.unmodifiableMap(fields); + } + + @Override + public void addField(MappedFieldType fieldType, Object value) { + fields.put(fieldType != null ? fieldType.name() : "field_" + fields.size(), value); + } + + @Override + public void setRowId(String rowIdFieldName, long rowId) { + fields.put(rowIdFieldName, rowId); + } + + @Override + public void close() {} + } } diff --git a/server/src/test/java/org/opensearch/index/mapper/DocumentParserTests.java b/server/src/test/java/org/opensearch/index/mapper/DocumentParserTests.java index b99320410b99e..d94d81c59f81c 100644 --- a/server/src/test/java/org/opensearch/index/mapper/DocumentParserTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/DocumentParserTests.java @@ -42,6 +42,7 @@ import org.opensearch.core.common.bytes.BytesReference; import org.opensearch.core.xcontent.MediaTypeRegistry; import org.opensearch.index.IndexSettings; +import org.opensearch.index.engine.dataformat.DocumentInput; import org.opensearch.index.mapper.ParseContext.Document; import org.opensearch.plugins.Plugin; @@ -52,7 +53,9 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; import java.util.List; +import java.util.Map; import static java.util.Collections.singletonList; import static org.opensearch.test.StreamsUtils.copyToBytesFromClasspath; @@ -62,6 +65,8 @@ import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.not; import static org.hamcrest.Matchers.notNullValue; +import static org.hamcrest.Matchers.nullValue; +import static org.hamcrest.Matchers.sameInstance; public class DocumentParserTests extends MapperServiceTestCase { @@ -3574,4 +3579,86 @@ public void testGeoPointArrayWithMultipleCopyTo() throws Exception { assertNotNull(copy2Fields); assertTrue(copy2Fields.length > 0); } + + public void testParseDocumentWithDocumentInputPropagated() throws Exception { + DocumentMapper mapper = createDocumentMapper(mapping(b -> b.startObject("field").field("type", "text").endObject())); + DocumentInput> mockInput = new TestDocumentInput(); + + ParsedDocument doc = mapper.parse(source(b -> b.field("field", "value")), mockInput); + + assertThat(doc.getDocumentInput(), sameInstance(mockInput)); + assertNotNull(doc.rootDoc().getField("field")); + } + + public void testParseDocumentWithNullDocumentInput() throws Exception { + DocumentMapper mapper = createDocumentMapper(mapping(b -> b.startObject("field").field("type", "text").endObject())); + + ParsedDocument doc = mapper.parse(source(b -> b.field("field", "value")), null); + + assertThat(doc.getDocumentInput(), nullValue()); + } + + public void testParseDocumentWithoutDocumentInputDefaultsToNull() throws Exception { + DocumentMapper mapper = createDocumentMapper(mapping(b -> b.startObject("field").field("type", "text").endObject())); + + ParsedDocument doc = mapper.parse(source(b -> b.field("field", "value"))); + + assertThat(doc.getDocumentInput(), nullValue()); + } + + public void testParseDocumentWithDocumentInputAndDynamicMapping() throws Exception { + DocumentMapper mapper = createDocumentMapper(mapping(b -> {})); + DocumentInput> mockInput = new TestDocumentInput(); + + ParsedDocument doc = mapper.parse(source(b -> b.field("dynamic_field", "value")), mockInput); + + assertThat(doc.getDocumentInput(), sameInstance(mockInput)); + assertNotNull(doc.dynamicMappingsUpdate()); + } + + public void testParseDocumentWithDocumentInputAndNestedFields() throws Exception { + DocumentMapper mapper = createDocumentMapper(mapping(b -> { + b.startObject("obj"); + { + b.startObject("properties"); + { + b.startObject("inner").field("type", "keyword").endObject(); + } + b.endObject(); + } + b.endObject(); + })); + DocumentInput> mockInput = new TestDocumentInput(); + + ParsedDocument doc = mapper.parse(source(b -> { + b.startObject("obj"); + b.field("inner", "test"); + b.endObject(); + }), mockInput); + + assertThat(doc.getDocumentInput(), sameInstance(mockInput)); + assertNotNull(doc.rootDoc().getField("obj.inner")); + } + + private static class TestDocumentInput implements DocumentInput> { + private final Map fields = new HashMap<>(); + + @Override + public Map getFinalInput() { + return Collections.unmodifiableMap(fields); + } + + @Override + public void addField(MappedFieldType fieldType, Object value) { + fields.put(fieldType != null ? fieldType.name() : "field_" + fields.size(), value); + } + + @Override + public void setRowId(String rowIdFieldName, long rowId) { + fields.put(rowIdFieldName, rowId); + } + + @Override + public void close() {} + } } diff --git a/server/src/test/java/org/opensearch/index/mapper/ExternalMapper.java b/server/src/test/java/org/opensearch/index/mapper/ExternalMapper.java index 49b38f828f837..00c99cbd29d26 100644 --- a/server/src/test/java/org/opensearch/index/mapper/ExternalMapper.java +++ b/server/src/test/java/org/opensearch/index/mapper/ExternalMapper.java @@ -210,6 +210,11 @@ protected void parseCreateField(ParseContext context) { throw new UnsupportedOperationException(); } + @Override + protected void parseCreateFieldForPluggableFormat(ParseContext context) { + throw new UnsupportedOperationException(); + } + @Override public Iterator iterator() { return Iterators.concat(super.iterator(), Arrays.asList(binMapper, boolMapper, pointMapper, shapeMapper, stringMapper).iterator()); diff --git a/server/src/test/java/org/opensearch/index/mapper/FakeStringFieldMapper.java b/server/src/test/java/org/opensearch/index/mapper/FakeStringFieldMapper.java index 9da53b36e11d6..9e5153058d67e 100644 --- a/server/src/test/java/org/opensearch/index/mapper/FakeStringFieldMapper.java +++ b/server/src/test/java/org/opensearch/index/mapper/FakeStringFieldMapper.java @@ -107,19 +107,28 @@ protected FakeStringFieldMapper(MappedFieldType mappedFieldType, MultiFields mul @Override protected void parseCreateField(ParseContext context) throws IOException { - String value; - if (context.externalValueSet()) { - value = context.externalValue().toString(); - } else { - value = context.parser().textOrNull(); + String value = parseValue(context); + if (value == null) { + return; } + Field field = new Field(fieldType().name(), value, FIELD_TYPE); + context.doc().add(field); + } + @Override + protected void parseCreateFieldForPluggableFormat(ParseContext context) throws IOException { + String value = parseValue(context); if (value == null) { return; } + context.documentInput().addField(fieldType(), value); + } - Field field = new Field(fieldType().name(), value, FIELD_TYPE); - context.doc().add(field); + private String parseValue(ParseContext context) throws IOException { + if (context.externalValueSet()) { + return context.externalValue().toString(); + } + return context.parser().textOrNull(); } @Override diff --git a/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldMapperTests.java index adb2c8b1ffe2a..b788a024c011e 100644 --- a/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldMapperTests.java @@ -17,6 +17,8 @@ import org.apache.lucene.search.TermQuery; import org.apache.lucene.util.BytesRef; import org.opensearch.common.TriFunction; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.util.set.Sets; import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.common.xcontent.json.JsonXContent; @@ -28,6 +30,7 @@ import java.io.IOException; import java.util.List; +import java.util.Map; import java.util.Set; import static org.opensearch.index.mapper.FlatObjectFieldMapper.CONTENT_TYPE; @@ -417,4 +420,111 @@ protected void registerParameters(ParameterChecker checker) throws IOException { // In the future we will want to make sure parameter updates are covered. } + private Settings pluggableSettings() { + return Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatSimpleObject() throws IOException { + DocumentMapper mapper = createDocumentMapper( + pluggableSettings(), + mapping(b -> b.startObject("field").field("type", "flat_object").endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + String json = "{\"field\":{\"foo\":\"bar\"}}"; + mapper.parse(source(json), docInput); + + List> captured = docInput.getCapturedFields(); + boolean found = captured.stream() + .anyMatch(e -> e.getKey().name().equals("field") && e.getValue().equals(new BytesRef("field.foo"))); + assertTrue("Expected flat_object path field captured", found); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatNullValue() throws IOException { + DocumentMapper mapper = createDocumentMapper( + pluggableSettings(), + mapping(b -> b.startObject("field").field("type", "flat_object").endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.nullField("field")), docInput); + + boolean hasField = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals("field")); + assertFalse("Expected no captured field for null value", hasField); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatNonObjectThrows() throws IOException { + DocumentMapper mapper = createDocumentMapper( + pluggableSettings(), + mapping(b -> b.startObject("field").field("type", "flat_object").endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + Exception e = expectThrows( + MapperParsingException.class, + () -> mapper.parse(source(b -> b.field("field", "string_value")), docInput) + ); + assertThat(e.getCause().getMessage(), org.hamcrest.Matchers.containsString("unexpected token")); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatNestedObject() throws IOException { + DocumentMapper mapper = createDocumentMapper( + pluggableSettings(), + mapping(b -> b.startObject("field").field("type", "flat_object").endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + String json = "{\"field\":{\"a\":\"1\",\"b\":\"2\"}}"; + mapper.parse(source(json), docInput); + + List> captured = docInput.getCapturedFields(); + assertEquals(2, captured.size()); + Set values = Set.of(captured.get(0).getValue(), captured.get(1).getValue()); + assertTrue(values.contains(new BytesRef("field.a"))); + assertTrue(values.contains(new BytesRef("field.b"))); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatWithNullFieldsInObject() throws IOException { + DocumentMapper mapper = createDocumentMapper( + pluggableSettings(), + mapping(b -> b.startObject("field").field("type", "flat_object").endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + String json = "{\"field\":{\"name\":null,\"age\":3}}"; + mapper.parse(source(json), docInput); + + List> captured = docInput.getCapturedFields(); + assertEquals(1, captured.size()); + assertEquals(new BytesRef("field.age"), captured.get(0).getValue()); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatDeepNestedObject() throws IOException { + DocumentMapper mapper = createDocumentMapper( + pluggableSettings(), + mapping(b -> b.startObject("field").field("type", "flat_object").endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + String json = "{\"field\":{\"a\":{\"b\":\"val\"}}}"; + mapper.parse(source(json), docInput); + + List> captured = docInput.getCapturedFields(); + assertTrue(captured.stream().anyMatch(e -> e.getValue().equals(new BytesRef("field.a")))); + assertTrue(captured.stream().anyMatch(e -> e.getValue().equals(new BytesRef("field.b")))); + } + + public void testDefaultsDoNotUseDocumentInput() throws Exception { + DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); + String json = XContentFactory.jsonBuilder() + .startObject() + .startObject("field") + .field("foo", "bar") + .endObject() + .endObject() + .toString(); + ParsedDocument doc = mapper.parse(source(json)); + IndexableField[] fields = doc.rootDoc().getFields("field"); + assertEquals(2, fields.length); + } } diff --git a/server/src/test/java/org/opensearch/index/mapper/GeoShapeFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/GeoShapeFieldMapperTests.java index 016862e3ffabc..304edb2beb8ce 100644 --- a/server/src/test/java/org/opensearch/index/mapper/GeoShapeFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/GeoShapeFieldMapperTests.java @@ -255,4 +255,10 @@ public void testGeoShapeArrayParsing() throws Exception { protected boolean supportsMeta() { return false; } + + public void testPluggableDataFormatGeoShapeThrows() throws IOException { + DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); + GeoShapeFieldMapper geoMapper = (GeoShapeFieldMapper) mapper.mappers().getMapper("field"); + expectThrows(UnsupportedOperationException.class, () -> geoMapper.parseCreateFieldForPluggableFormat(null)); + } } diff --git a/server/src/test/java/org/opensearch/index/mapper/HllFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/HllFieldMapperTests.java index 9dab7d1162ce3..8eb9c6a526702 100644 --- a/server/src/test/java/org/opensearch/index/mapper/HllFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/HllFieldMapperTests.java @@ -10,8 +10,10 @@ import org.apache.lucene.util.BytesRef; import org.opensearch.common.io.stream.BytesStreamOutput; +import org.opensearch.common.settings.Settings; import org.opensearch.common.util.BigArrays; import org.opensearch.common.util.BitMixer; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.core.common.bytes.BytesArray; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.search.aggregations.metrics.AbstractHyperLogLog; @@ -20,6 +22,8 @@ import java.io.IOException; import java.util.Arrays; +import java.util.List; +import java.util.Map; import static org.opensearch.search.DocValueFormat.BINARY; import static org.hamcrest.Matchers.containsString; @@ -765,4 +769,61 @@ public void testValueFetcherWithDocValues() throws IOException { // 2. Sketches can be reconstructed from doc values // 3. DocValueFormat is BINARY } + + private Settings pluggableSettings() { + return Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + } + + private byte[] createValidSketchBytes() throws IOException { + HyperLogLogPlusPlus sketch = new HyperLogLogPlusPlus(HyperLogLogPlusPlus.DEFAULT_PRECISION, BigArrays.NON_RECYCLING_INSTANCE, 1); + try { + sketch.collect(0, 1L); + BytesStreamOutput out = new BytesStreamOutput(); + sketch.writeTo(0, out); + BytesRef ref = out.bytes().toBytesRef(); + return Arrays.copyOfRange(ref.bytes, ref.offset, ref.offset + ref.length); + } finally { + sketch.close(); + } + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatValidSketch() throws IOException { + DocumentMapper mapper = createDocumentMapper( + pluggableSettings(), + mapping(b -> b.startObject("field").field("type", "hll").endObject()) + ); + byte[] sketchBytes = createValidSketchBytes(); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.field("field", sketchBytes)), docInput); + + List> captured = docInput.getCapturedFields(); + boolean found = captured.stream().anyMatch(e -> e.getKey().name().equals("field")); + assertTrue("Expected hll field captured with sketch bytes", found); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatNullValueSkipped() throws IOException { + DocumentMapper mapper = createDocumentMapper( + pluggableSettings(), + mapping(b -> b.startObject("field").field("type", "hll").endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.nullField("field")), docInput); + + boolean hasField = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals("field")); + assertFalse("Expected no captured field for null value", hasField); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatInvalidSketchThrows() throws IOException { + DocumentMapper mapper = createDocumentMapper( + pluggableSettings(), + mapping(b -> b.startObject("field").field("type", "hll").endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + byte[] invalidBytes = new byte[] { 0, 1, 2, 3 }; + Exception e = expectThrows(MapperParsingException.class, () -> mapper.parse(source(b -> b.field("field", invalidBytes)), docInput)); + assertThat(e.getMessage(), containsString("field [field]")); + } } diff --git a/server/src/test/java/org/opensearch/index/mapper/IpFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/IpFieldMapperTests.java index 54399a31e4456..ba101d7d2b25c 100644 --- a/server/src/test/java/org/opensearch/index/mapper/IpFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/IpFieldMapperTests.java @@ -46,7 +46,10 @@ import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; +import org.opensearch.common.CheckedConsumer; import org.opensearch.common.network.InetAddresses; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.index.termvectors.TermVectorsService; @@ -317,4 +320,83 @@ private Document createDocument(String value, boolean hasDocValues) { } return doc; } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatIpValue() throws Exception { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + DocumentMapper mapper = createDocumentMapper( + pluggableSettings, + mapping(b -> b.startObject(FIELD_NAME).field("type", "ip").endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.field(FIELD_NAME, "192.168.1.1")), docInput); + + boolean found = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals(FIELD_NAME)); + assertTrue("Expected ip field to be captured", found); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatIpNullSkipped() throws Exception { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + DocumentMapper mapper = createDocumentMapper( + pluggableSettings, + mapping(b -> b.startObject(FIELD_NAME).field("type", "ip").endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.nullField(FIELD_NAME)), docInput); + + boolean found = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals(FIELD_NAME)); + assertFalse("Expected no ip field to be captured for null value", found); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggablePathEquivalenceWithLucenePath() throws Exception { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + + // Scenario 1: ip value + assertIpLuceneAndPluggablePathsEquivalent( + pluggableSettings, + mapping(b -> b.startObject(FIELD_NAME).field("type", "ip").endObject()), + b -> b.field(FIELD_NAME, "192.168.1.1"), + FIELD_NAME, + true + ); + + // Scenario 2: null value — no field produced + assertIpLuceneAndPluggablePathsEquivalent( + pluggableSettings, + mapping(b -> b.startObject(FIELD_NAME).field("type", "ip").endObject()), + b -> b.nullField(FIELD_NAME), + FIELD_NAME, + false + ); + } + + private void assertIpLuceneAndPluggablePathsEquivalent( + Settings pluggableSettings, + XContentBuilder mappingBuilder, + CheckedConsumer sourceBuilder, + String fieldName, + boolean expectField + ) throws IOException { + // Lucene path + DocumentMapper luceneMapper = createDocumentMapper(mappingBuilder); + ParsedDocument luceneDoc = luceneMapper.parse(source(sourceBuilder)); + IndexableField[] luceneFields = luceneDoc.rootDoc().getFields(fieldName); + + // Pluggable path + DocumentMapper pluggableMapper = createDocumentMapper(pluggableSettings, mappingBuilder); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + pluggableMapper.parse(source(sourceBuilder), docInput); + + boolean pluggableHasField = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals(fieldName)); + + if (!expectField) { + assertEquals("Lucene path should produce no field for '" + fieldName + "'", 0, luceneFields.length); + assertFalse("Pluggable path should produce no field for '" + fieldName + "'", pluggableHasField); + } else { + assertTrue("Lucene path should produce field '" + fieldName + "'", luceneFields.length > 0); + assertTrue("Pluggable path should capture field '" + fieldName + "'", pluggableHasField); + } + } } diff --git a/server/src/test/java/org/opensearch/index/mapper/KeywordFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/KeywordFieldMapperTests.java index 3a623f46101de..993a8af11075f 100644 --- a/server/src/test/java/org/opensearch/index/mapper/KeywordFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/KeywordFieldMapperTests.java @@ -51,6 +51,9 @@ import org.apache.lucene.tests.analysis.MockLowerCaseFilter; import org.apache.lucene.tests.analysis.MockTokenizer; import org.apache.lucene.util.BytesRef; +import org.opensearch.common.CheckedConsumer; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.index.IndexSettings; @@ -582,4 +585,232 @@ private Document createDocument(KeywordFieldMapper mapper, String value, boolean } return doc; } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatDefaultKeyword() throws IOException { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + DocumentMapper mapper = createDocumentMapper( + pluggableSettings, + mapping(b -> b.startObject("field").field("type", "keyword").endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.field("field", "test_value")), docInput); + + boolean found = docInput.getCapturedFields() + .stream() + .anyMatch(e -> e.getKey().name().equals("field") && e.getValue().equals("test_value")); + assertTrue("Expected keyword field captured with value 'test_value'", found); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatNullValueSkipped() throws IOException { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + DocumentMapper mapper = createDocumentMapper( + pluggableSettings, + mapping(b -> b.startObject("field").field("type", "keyword").endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.nullField("field")), docInput); + + boolean hasField = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals("field")); + assertFalse("Expected no captured field for null value", hasField); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatNullValueConfigured() throws IOException { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + DocumentMapper mapper = createDocumentMapper( + pluggableSettings, + mapping(b -> b.startObject("field").field("type", "keyword").field("null_value", "default_val").endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.nullField("field")), docInput); + + boolean found = docInput.getCapturedFields() + .stream() + .anyMatch(e -> e.getKey().name().equals("field") && e.getValue().equals("default_val")); + assertTrue("Expected keyword field captured with null_value 'default_val'", found); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatIgnoreAbove() throws IOException { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + DocumentMapper mapper = createDocumentMapper( + pluggableSettings, + mapping(b -> b.startObject("field").field("type", "keyword").field("ignore_above", 5).endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.field("field", "opensearch")), docInput); + + boolean hasField = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals("field")); + assertFalse("Expected no captured field when value exceeds ignore_above", hasField); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatIgnoreAboveWithinLimit() throws IOException { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + DocumentMapper mapper = createDocumentMapper( + pluggableSettings, + mapping(b -> b.startObject("field").field("type", "keyword").field("ignore_above", 5).endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.field("field", "elk")), docInput); + + boolean found = docInput.getCapturedFields() + .stream() + .anyMatch(e -> e.getKey().name().equals("field") && e.getValue().equals("elk")); + assertTrue("Expected keyword field captured with value 'elk' within ignore_above limit", found); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatWithNormalizer() throws IOException { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + DocumentMapper mapper = createDocumentMapper( + pluggableSettings, + mapping(b -> b.startObject("field").field("type", "keyword").field("normalizer", "lowercase").endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.field("field", "AbC")), docInput); + + boolean found = docInput.getCapturedFields() + .stream() + .anyMatch(e -> e.getKey().name().equals("field") && e.getValue().equals("abc")); + assertTrue("Expected keyword field captured with normalized value 'abc'", found); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatWithExternalValue() throws IOException { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + DocumentMapper mapper = createDocumentMapper(pluggableSettings, mapping(b -> { + b.startObject("text_field"); + b.field("type", "text"); + b.startObject("fields"); + b.startObject("kw").field("type", "keyword").endObject(); + b.endObject(); + b.endObject(); + })); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.field("text_field", "external_keyword")), docInput); + + boolean found = docInput.getCapturedFields() + .stream() + .anyMatch(e -> e.getKey().name().equals("text_field.kw") && e.getValue().equals("external_keyword")); + assertTrue("Expected keyword sub-field captured with external value 'external_keyword'", found); + } + + public void testDefaultsDoNotUseDocumentInput() throws Exception { + XContentBuilder mapping = fieldMapping(this::minimalMapping); + DocumentMapper mapper = createDocumentMapper(mapping); + + ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234"))); + IndexableField[] fields = doc.rootDoc().getFields("field"); + assertEquals(2, fields.length); + assertEquals(new BytesRef("1234"), fields[0].binaryValue()); + assertEquals(new BytesRef("1234"), fields[1].binaryValue()); + } + + /** + * Cross-path equivalence test: verifies that the pluggable DocumentInput path + * captures the same field values as the Lucene Document path for all common + * keyword scenarios (default, null_value, ignore_above, normalizer, multi-field). + */ + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggablePathEquivalenceWithLucenePath() throws IOException { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + + // Scenario 1: default keyword + assertLuceneAndPluggablePathsEquivalent( + pluggableSettings, + mapping(b -> b.startObject("field").field("type", "keyword").endObject()), + b -> b.field("field", "1234"), + "field", + "1234" + ); + + // Scenario 2: null value — no field produced + assertLuceneAndPluggablePathsEquivalent( + pluggableSettings, + mapping(b -> b.startObject("field").field("type", "keyword").endObject()), + b -> b.nullField("field"), + "field", + null + ); + + // Scenario 3: null_value configured — substitution kicks in + assertLuceneAndPluggablePathsEquivalent( + pluggableSettings, + mapping(b -> b.startObject("field").field("type", "keyword").field("null_value", "uri").endObject()), + b -> b.nullField("field"), + "field", + "uri" + ); + + // Scenario 4: ignore_above — value exceeds limit, no field produced + assertLuceneAndPluggablePathsEquivalent( + pluggableSettings, + mapping(b -> b.startObject("field").field("type", "keyword").field("ignore_above", 5).endObject()), + b -> b.field("field", "opensearch"), + "field", + null + ); + + // Scenario 5: ignore_above — value within limit + assertLuceneAndPluggablePathsEquivalent( + pluggableSettings, + mapping(b -> b.startObject("field").field("type", "keyword").field("ignore_above", 5).endObject()), + b -> b.field("field", "elk"), + "field", + "elk" + ); + + // Scenario 6: normalizer + assertLuceneAndPluggablePathsEquivalent( + pluggableSettings, + mapping(b -> b.startObject("field").field("type", "keyword").field("normalizer", "lowercase").endObject()), + b -> b.field("field", "AbC"), + "field", + "abc" + ); + } + + /** + * Parses the same source through both the Lucene path and the pluggable DocumentInput path, + * then asserts they agree on the produced field value (or absence thereof). + * + * @param expectedValue the expected value, or null if no field should be produced + */ + private void assertLuceneAndPluggablePathsEquivalent( + Settings pluggableSettings, + XContentBuilder mappingBuilder, + CheckedConsumer sourceBuilder, + String fieldName, + String expectedValue + ) throws IOException { + // Lucene path (default, no pluggable setting) + DocumentMapper luceneMapper = createDocumentMapper(mappingBuilder); + ParsedDocument luceneDoc = luceneMapper.parse(source(sourceBuilder)); + IndexableField[] luceneFields = luceneDoc.rootDoc().getFields(fieldName); + + // Pluggable path + DocumentMapper pluggableMapper = createDocumentMapper(pluggableSettings, mappingBuilder); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + pluggableMapper.parse(source(sourceBuilder), docInput); + + if (expectedValue == null) { + // Both paths should produce no field + assertEquals("Lucene path should produce no field for '" + fieldName + "'", 0, luceneFields.length); + boolean pluggableHasField = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals(fieldName)); + assertFalse("Pluggable path should produce no field for '" + fieldName + "'", pluggableHasField); + } else { + // Lucene path should have produced the field with the expected value + assertTrue("Lucene path should produce field '" + fieldName + "'", luceneFields.length > 0); + assertEquals(new BytesRef(expectedValue), luceneFields[0].binaryValue()); + + // Pluggable path should capture the same value + boolean pluggableFound = docInput.getCapturedFields() + .stream() + .anyMatch(e -> e.getKey().name().equals(fieldName) && e.getValue().equals(expectedValue)); + assertTrue("Pluggable path should capture field '" + fieldName + "' with value '" + expectedValue + "'", pluggableFound); + } + } } diff --git a/server/src/test/java/org/opensearch/index/mapper/MapperTests.java b/server/src/test/java/org/opensearch/index/mapper/MapperTests.java index 2b7d83e2ec04b..05327643dc86f 100644 --- a/server/src/test/java/org/opensearch/index/mapper/MapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/MapperTests.java @@ -35,6 +35,7 @@ import org.opensearch.Version; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.test.OpenSearchTestCase; public class MapperTests extends OpenSearchTestCase { @@ -52,4 +53,18 @@ public void testBuilderContextWithIndexSettingsAsNull() { expectThrows(NullPointerException.class, () -> new Mapper.BuilderContext(null, new ContentPath(1))); } + public void testIsOptimisedIndexEnabledReturnsFalseByDefault() { + Settings settings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).build(); + assertFalse(Mapper.isPluggableDataFormatEnabled(settings)); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testIsOptimisedIndexEnabledReturnsTrueWhenFlagEnabled() { + Settings settings = Settings.builder() + .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) + .put("index.pluggable.dataformat.enabled", true) + .build(); + assertTrue(Mapper.isPluggableDataFormatEnabled(settings)); + } + } diff --git a/server/src/test/java/org/opensearch/index/mapper/NumberFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/NumberFieldMapperTests.java index 7f2efa0ba7fdb..b3fc834501f05 100644 --- a/server/src/test/java/org/opensearch/index/mapper/NumberFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/NumberFieldMapperTests.java @@ -49,7 +49,10 @@ import org.apache.lucene.sandbox.document.HalfFloatPoint; import org.apache.lucene.store.Directory; import org.apache.lucene.util.NumericUtils; +import org.opensearch.common.CheckedConsumer; import org.opensearch.common.Numbers; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.core.common.bytes.BytesArray; import org.opensearch.core.xcontent.MediaTypeRegistry; @@ -739,4 +742,138 @@ public void testNegativeNumberHandling() { decoded = IntPoint.decodeDimension(encoded, 0); assertEquals(-101, decoded); } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatIntegerValue() throws Exception { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + DocumentMapper mapper = createDocumentMapper( + pluggableSettings, + mapping(b -> b.startObject(FIELD_NAME).field("type", "integer").endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.field(FIELD_NAME, 42)), docInput); + + boolean found = docInput.getCapturedFields() + .stream() + .anyMatch(e -> e.getKey().name().equals(FIELD_NAME) && e.getValue().equals(42)); + assertTrue("Expected integer value 42", found); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatLongValue() throws Exception { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + DocumentMapper mapper = createDocumentMapper( + pluggableSettings, + mapping(b -> b.startObject(FIELD_NAME).field("type", "long").endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.field(FIELD_NAME, 123456789L)), docInput); + + boolean found = docInput.getCapturedFields() + .stream() + .anyMatch(e -> e.getKey().name().equals(FIELD_NAME) && e.getValue().equals(123456789L)); + assertTrue("Expected long value 123456789", found); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatDoubleValue() throws Exception { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + DocumentMapper mapper = createDocumentMapper( + pluggableSettings, + mapping(b -> b.startObject(FIELD_NAME).field("type", "double").endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.field(FIELD_NAME, 3.14)), docInput); + + boolean found = docInput.getCapturedFields() + .stream() + .anyMatch(e -> e.getKey().name().equals(FIELD_NAME) && e.getValue().equals(3.14)); + assertTrue("Expected double value 3.14", found); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatNullValueSkipped() throws Exception { + Settings settings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + DocumentMapper mapper = createDocumentMapper( + settings, + mapping(b -> b.startObject(FIELD_NAME).field("type", "integer").endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.nullField(FIELD_NAME)), docInput); + + assertFalse(docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals(FIELD_NAME))); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggablePathEquivalenceWithLucenePath() throws Exception { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + + // Scenario 1: integer value + assertNumericLuceneAndPluggablePathsEquivalent( + pluggableSettings, + mapping(b -> b.startObject(FIELD_NAME).field("type", "integer").endObject()), + b -> b.field(FIELD_NAME, 42), + FIELD_NAME, + 42 + ); + + // Scenario 2: long value + assertNumericLuceneAndPluggablePathsEquivalent( + pluggableSettings, + mapping(b -> b.startObject(FIELD_NAME).field("type", "long").endObject()), + b -> b.field(FIELD_NAME, 123456789L), + FIELD_NAME, + 123456789L + ); + + // Scenario 3: double value + assertNumericLuceneAndPluggablePathsEquivalent( + pluggableSettings, + mapping(b -> b.startObject(FIELD_NAME).field("type", "double").endObject()), + b -> b.field(FIELD_NAME, 3.14), + FIELD_NAME, + 3.14 + ); + + // Scenario 4: null value — no field produced + assertNumericLuceneAndPluggablePathsEquivalent( + pluggableSettings, + mapping(b -> b.startObject(FIELD_NAME).field("type", "integer").endObject()), + b -> b.nullField(FIELD_NAME), + FIELD_NAME, + null + ); + } + + private void assertNumericLuceneAndPluggablePathsEquivalent( + Settings pluggableSettings, + XContentBuilder mappingBuilder, + CheckedConsumer sourceBuilder, + String fieldName, + Number expectedValue + ) throws IOException { + // Lucene path + DocumentMapper luceneMapper = createDocumentMapper(mappingBuilder); + ParsedDocument luceneDoc = luceneMapper.parse(source(sourceBuilder)); + IndexableField[] luceneFields = luceneDoc.rootDoc().getFields(fieldName); + + // Pluggable path + DocumentMapper pluggableMapper = createDocumentMapper(pluggableSettings, mappingBuilder); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + pluggableMapper.parse(source(sourceBuilder), docInput); + + if (expectedValue == null) { + assertEquals("Lucene path should produce no field for '" + fieldName + "'", 0, luceneFields.length); + boolean pluggableHasField = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals(fieldName)); + assertFalse("Pluggable path should produce no field for '" + fieldName + "'", pluggableHasField); + } else { + assertTrue("Lucene path should produce field '" + fieldName + "'", luceneFields.length > 0); + assertEquals(expectedValue.doubleValue(), luceneFields[0].numericValue().doubleValue(), 0.001d); + + boolean pluggableFound = docInput.getCapturedFields() + .stream() + .anyMatch(e -> e.getKey().name().equals(fieldName) && e.getValue().equals(expectedValue)); + assertTrue("Pluggable path should capture field '" + fieldName + "' with value '" + expectedValue + "'", pluggableFound); + } + } } diff --git a/server/src/test/java/org/opensearch/index/mapper/ParametrizedMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/ParametrizedMapperTests.java index b7c65e09bcce3..b113b773824c9 100644 --- a/server/src/test/java/org/opensearch/index/mapper/ParametrizedMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/ParametrizedMapperTests.java @@ -209,6 +209,11 @@ protected void parseCreateField(ParseContext context) { } + @Override + protected void parseCreateFieldForPluggableFormat(ParseContext context) { + + } + @Override protected String contentType() { return "test_mapper"; diff --git a/server/src/test/java/org/opensearch/index/mapper/RangeFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/RangeFieldMapperTests.java index 40325d7f5b3fa..e4a2806f3fed5 100644 --- a/server/src/test/java/org/opensearch/index/mapper/RangeFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/RangeFieldMapperTests.java @@ -37,6 +37,7 @@ import org.apache.lucene.index.IndexableField; import org.opensearch.common.CheckedConsumer; import org.opensearch.common.network.InetAddresses; +import org.opensearch.common.settings.Settings; import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.core.xcontent.ToXContent; @@ -45,7 +46,9 @@ import java.io.IOException; import java.net.InetAddress; +import java.util.List; import java.util.Locale; +import java.util.Map; import java.util.Set; import static org.opensearch.index.query.RangeQueryBuilder.GTE_FIELD; @@ -454,4 +457,49 @@ public void testUpdatesWithSameMappings() throws Exception { mapper.merge(mapping, MergeReason.MAPPING_UPDATE); } } + + private Settings pluggableSettings() { + return Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatLongRange() throws IOException { + DocumentMapper mapper = createDocumentMapper( + pluggableSettings(), + mapping(b -> b.startObject("field").field("type", "long_range").endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.startObject("field").field("gte", 5).field("lte", 10).endObject()), docInput); + + List> captured = docInput.getCapturedFields(); + boolean found = captured.stream().anyMatch(e -> e.getKey().name().equals("field")); + assertTrue("Expected range field captured", found); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatNullValueSkipped() throws IOException { + DocumentMapper mapper = createDocumentMapper( + pluggableSettings(), + mapping(b -> b.startObject("field").field("type", "long_range").endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.nullField("field")), docInput); + + boolean hasField = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals("field")); + assertFalse("Expected no captured field for null value", hasField); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatIpRange() throws IOException { + DocumentMapper mapper = createDocumentMapper( + pluggableSettings(), + mapping(b -> b.startObject("field").field("type", "ip_range").endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.field("field", "192.168.1.0/24")), docInput); + + List> captured = docInput.getCapturedFields(); + boolean found = captured.stream().anyMatch(e -> e.getKey().name().equals("field")); + assertTrue("Expected ip_range field captured from CIDR", found); + } } diff --git a/server/src/test/java/org/opensearch/index/mapper/RoutingFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/RoutingFieldMapperTests.java index 561a35efc6d18..ee57dc514a526 100644 --- a/server/src/test/java/org/opensearch/index/mapper/RoutingFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/RoutingFieldMapperTests.java @@ -33,6 +33,7 @@ package org.opensearch.index.mapper; import org.opensearch.common.compress.CompressedXContent; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.core.common.bytes.BytesReference; import org.opensearch.core.xcontent.MediaTypeRegistry; @@ -87,4 +88,18 @@ public void testIncludeInObjectNotAllowed() throws Exception { ); } } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatMetadataFieldThrows() throws Exception { + String mapping = XContentFactory.jsonBuilder().startObject().endObject().toString(); + DocumentMapper docMapper = createIndex("test_pluggable").mapperService() + .merge("type", new CompressedXContent(mapping), MapperService.MergeReason.MAPPING_UPDATE); + RoutingFieldMapper routingMapper = docMapper.metadataMapper(RoutingFieldMapper.class); + assertNotNull(routingMapper); + MapperParsingException ex = expectThrows( + MapperParsingException.class, + () -> routingMapper.parseCreateFieldForPluggableFormat(null) + ); + assertThat(ex.getMessage(), containsString("metadata field and cannot be added inside a document")); + } } diff --git a/server/src/test/java/org/opensearch/index/mapper/SemanticVersionFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/SemanticVersionFieldMapperTests.java index 35bc16c512bc6..5864d5c181fa8 100644 --- a/server/src/test/java/org/opensearch/index/mapper/SemanticVersionFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/SemanticVersionFieldMapperTests.java @@ -29,7 +29,9 @@ import org.apache.lucene.util.automaton.Operations; import org.apache.lucene.util.automaton.RegExp; import org.opensearch.common.geo.ShapeRelation; +import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.Fuzziness; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.core.xcontent.ToXContent; import org.opensearch.core.xcontent.XContentBuilder; @@ -851,4 +853,36 @@ public void testFieldTypeErrorCases() { ); assertThat(fieldDataException.getMessage(), containsString("does not have doc_values enabled")); } + + private Settings pluggableSettings() { + return Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatSemanticVersion() throws IOException { + DocumentMapper mapper = createDocumentMapper( + pluggableSettings(), + mapping(b -> b.startObject("field").field("type", "version").endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.field("field", "1.2.3")), docInput); + + boolean found = docInput.getCapturedFields() + .stream() + .anyMatch(e -> e.getKey().name().equals("field") && e.getValue().equals("1.2.3")); + assertTrue("Expected version field captured with value '1.2.3'", found); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatNullValueSkipped() throws IOException { + DocumentMapper mapper = createDocumentMapper( + pluggableSettings(), + mapping(b -> b.startObject("field").field("type", "version").endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.nullField("field")), docInput); + + boolean hasField = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals("field")); + assertFalse("Expected no captured field for null value", hasField); + } } diff --git a/server/src/test/java/org/opensearch/index/mapper/StarTreeMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/StarTreeMapperTests.java index 96b5a133f5479..493be07c0122a 100644 --- a/server/src/test/java/org/opensearch/index/mapper/StarTreeMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/StarTreeMapperTests.java @@ -13,6 +13,7 @@ import org.opensearch.common.Rounding; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.core.common.unit.ByteSizeUnit; import org.opensearch.core.common.unit.ByteSizeValue; import org.opensearch.core.xcontent.XContentBuilder; @@ -1496,4 +1497,16 @@ protected void writeFieldValue(XContentBuilder builder) throws IOException {} protected void registerParameters(ParameterChecker checker) throws IOException { } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatStarTreeThrows() throws IOException { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + DocumentMapper mapper = createDocumentMapper(pluggableSettings, getExpandedMappingWithJustAvg("status", "size")); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + MapperParsingException ex = expectThrows( + MapperParsingException.class, + () -> mapper.parse(source(b -> b.field("startree", "some_value")), docInput) + ); + assertThat(ex.getCause().getMessage(), containsString("star tree field and cannot be added inside a document")); + } } diff --git a/server/src/test/java/org/opensearch/index/mapper/TextFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/TextFieldMapperTests.java index 8cc1c281d67e9..b0a76b0599a07 100644 --- a/server/src/test/java/org/opensearch/index/mapper/TextFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/TextFieldMapperTests.java @@ -72,8 +72,10 @@ import org.apache.lucene.util.BytesRef; import org.opensearch.Version; import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.CheckedConsumer; import org.opensearch.common.lucene.search.MultiPhrasePrefixQuery; import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.core.common.Strings; import org.opensearch.core.xcontent.MediaTypeRegistry; @@ -105,6 +107,7 @@ import java.util.Arrays; import java.util.Collections; import java.util.HashMap; +import java.util.List; import java.util.Map; import static java.util.Collections.emptyMap; @@ -1173,4 +1176,145 @@ private Document createDocument(String name, String value, boolean forKeyword, b } return doc; } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatTextValue() throws IOException { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + DocumentMapper mapper = createDocumentMapper(pluggableSettings, fieldMapping(b -> b.field("type", "text"))); + + CapturingDocumentInput capturingDocInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.field("field", "hello world")), capturingDocInput); + + List> captured = capturingDocInput.getCapturedFields(); + assertTrue(captured.stream().anyMatch(e -> e.getKey().name().equals("field") && e.getValue().equals("hello world"))); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatTextNullSkipped() throws IOException { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + DocumentMapper mapper = createDocumentMapper(pluggableSettings, fieldMapping(b -> b.field("type", "text"))); + + CapturingDocumentInput capturingDocInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.nullField("field")), capturingDocInput); + + List> captured = capturingDocInput.getCapturedFields(); + assertTrue(captured.stream().noneMatch(e -> e.getKey().name().equals("field"))); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatTextWithExternalValue() throws IOException { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + DocumentMapper mapper = createDocumentMapper(pluggableSettings, mapping(b -> { + b.startObject("text_field"); + b.field("type", "text"); + b.startObject("fields"); + b.startObject("sub").field("type", "text").endObject(); + b.endObject(); + b.endObject(); + })); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.field("text_field", "external_text")), docInput); + + boolean found = docInput.getCapturedFields() + .stream() + .anyMatch(e -> e.getKey().name().equals("text_field.sub") && e.getValue().equals("external_text")); + assertTrue("Expected text sub-field captured with external value", found); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatPhraseFieldMapperThrows() throws IOException { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + DocumentMapper mapper = createDocumentMapper( + pluggableSettings, + fieldMapping(b -> b.field("type", "text").field("index_phrases", true)) + ); + TextFieldMapper textMapper = (TextFieldMapper) mapper.mappers().getMapper("field"); + Mapper phraseMapper = null; + for (Mapper m : textMapper) { + if (m.name().endsWith("._index_phrase")) { + phraseMapper = m; + break; + } + } + assertNotNull("Expected phrase sub-mapper", phraseMapper); + assertTrue(phraseMapper instanceof FieldMapper); + FieldMapper phraseFieldMapper = (FieldMapper) phraseMapper; + expectThrows(UnsupportedOperationException.class, () -> phraseFieldMapper.parseCreateFieldForPluggableFormat(null)); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatPrefixFieldMapperThrows() throws IOException { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + DocumentMapper mapper = createDocumentMapper( + pluggableSettings, + fieldMapping(b -> b.field("type", "text").field("index_prefixes", new java.util.HashMap<>())) + ); + TextFieldMapper textMapper = (TextFieldMapper) mapper.mappers().getMapper("field"); + Mapper prefixMapper = null; + for (Mapper m : textMapper) { + if (m.name().endsWith("._index_prefix")) { + prefixMapper = m; + break; + } + } + assertNotNull("Expected prefix sub-mapper", prefixMapper); + assertTrue(prefixMapper instanceof FieldMapper); + FieldMapper prefixFieldMapper = (FieldMapper) prefixMapper; + expectThrows(UnsupportedOperationException.class, () -> prefixFieldMapper.parseCreateFieldForPluggableFormat(null)); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggablePathEquivalenceWithLucenePath() throws IOException { + Settings pluggableSettings = Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + + // Scenario 1: default text value + assertTextLuceneAndPluggablePathsEquivalent( + pluggableSettings, + fieldMapping(b -> b.field("type", "text")), + b -> b.field("field", "hello world"), + "field", + "hello world" + ); + + // Scenario 2: null value — no field produced + assertTextLuceneAndPluggablePathsEquivalent( + pluggableSettings, + fieldMapping(b -> b.field("type", "text")), + b -> b.nullField("field"), + "field", + null + ); + } + + private void assertTextLuceneAndPluggablePathsEquivalent( + Settings pluggableSettings, + XContentBuilder mappingBuilder, + CheckedConsumer sourceBuilder, + String fieldName, + String expectedValue + ) throws IOException { + // Lucene path + DocumentMapper luceneMapper = createDocumentMapper(mappingBuilder); + ParsedDocument luceneDoc = luceneMapper.parse(source(sourceBuilder)); + IndexableField[] luceneFields = luceneDoc.rootDoc().getFields(fieldName); + + // Pluggable path + DocumentMapper pluggableMapper = createDocumentMapper(pluggableSettings, mappingBuilder); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + pluggableMapper.parse(source(sourceBuilder), docInput); + + if (expectedValue == null) { + assertEquals("Lucene path should produce no field for '" + fieldName + "'", 0, luceneFields.length); + boolean pluggableHasField = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals(fieldName)); + assertFalse("Pluggable path should produce no field for '" + fieldName + "'", pluggableHasField); + } else { + assertTrue("Lucene path should produce field '" + fieldName + "'", luceneFields.length > 0); + assertEquals(expectedValue, luceneFields[0].stringValue()); + + boolean pluggableFound = docInput.getCapturedFields() + .stream() + .anyMatch(e -> e.getKey().name().equals(fieldName) && e.getValue().equals(expectedValue)); + assertTrue("Pluggable path should capture field '" + fieldName + "' with value '" + expectedValue + "'", pluggableFound); + } + } } diff --git a/server/src/test/java/org/opensearch/index/mapper/WildcardFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/WildcardFieldMapperTests.java index 3e0c88de46c5f..29da0f00ac2a9 100644 --- a/server/src/test/java/org/opensearch/index/mapper/WildcardFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/WildcardFieldMapperTests.java @@ -28,7 +28,9 @@ import org.apache.lucene.util.BytesRef; import org.opensearch.Version; import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.CheckedConsumer; import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.index.IndexSettings; @@ -396,4 +398,179 @@ private Document createDocument(WildcardFieldMapper mapper, String value) throws doc.add(new SortedSetDocValuesField(FIELD_NAME, binaryValue)); return doc; } + + private Settings pluggableSettings() { + return Settings.builder().put(getIndexSettings()).put("index.pluggable.dataformat.enabled", true).build(); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatDefaultWildcard() throws IOException { + DocumentMapper mapper = createDocumentMapper( + pluggableSettings(), + mapping(b -> b.startObject("field").field("type", "wildcard").endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.field("field", "test_value")), docInput); + + boolean found = docInput.getCapturedFields() + .stream() + .anyMatch(e -> e.getKey().name().equals("field") && e.getValue().equals("test_value")); + assertTrue("Expected wildcard field captured with value 'test_value'", found); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatNullValueSkipped() throws IOException { + DocumentMapper mapper = createDocumentMapper( + pluggableSettings(), + mapping(b -> b.startObject("field").field("type", "wildcard").endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.nullField("field")), docInput); + + boolean hasField = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals("field")); + assertFalse("Expected no captured field for null value", hasField); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatNullValueConfigured() throws IOException { + DocumentMapper mapper = createDocumentMapper( + pluggableSettings(), + mapping(b -> b.startObject("field").field("type", "wildcard").field("null_value", "default_val").endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.nullField("field")), docInput); + + boolean found = docInput.getCapturedFields() + .stream() + .anyMatch(e -> e.getKey().name().equals("field") && e.getValue().equals("default_val")); + assertTrue("Expected wildcard field captured with null_value 'default_val'", found); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatIgnoreAbove() throws IOException { + DocumentMapper mapper = createDocumentMapper( + pluggableSettings(), + mapping(b -> b.startObject("field").field("type", "wildcard").field("ignore_above", 5).endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.field("field", "opensearch")), docInput); + + boolean hasField = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals("field")); + assertFalse("Expected no captured field when value exceeds ignore_above", hasField); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatIgnoreAboveWithinLimit() throws IOException { + DocumentMapper mapper = createDocumentMapper( + pluggableSettings(), + mapping(b -> b.startObject("field").field("type", "wildcard").field("ignore_above", 10).endObject()) + ); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.field("field", "elk")), docInput); + + boolean found = docInput.getCapturedFields() + .stream() + .anyMatch(e -> e.getKey().name().equals("field") && e.getValue().equals("elk")); + assertTrue("Expected wildcard field captured with value 'elk' within ignore_above limit", found); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggableDataFormatWithExternalValue() throws IOException { + DocumentMapper mapper = createDocumentMapper(pluggableSettings(), mapping(b -> { + b.startObject("text_field"); + b.field("type", "text"); + b.startObject("fields"); + b.startObject("wc").field("type", "wildcard").endObject(); + b.endObject(); + b.endObject(); + })); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + mapper.parse(source(b -> b.field("text_field", "external_wildcard")), docInput); + + boolean found = docInput.getCapturedFields() + .stream() + .anyMatch(e -> e.getKey().name().equals("text_field.wc") && e.getValue().equals("external_wildcard")); + assertTrue("Expected wildcard sub-field captured with external value", found); + } + + @LockFeatureFlag(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) + public void testPluggablePathEquivalenceWithLucenePath() throws IOException { + Settings pluggable = pluggableSettings(); + + // Scenario 1: default wildcard value + assertWildcardLuceneAndPluggablePathsEquivalent( + pluggable, + mapping(b -> b.startObject("field").field("type", "wildcard").endObject()), + b -> b.field("field", "test_value"), + "field", + "test_value" + ); + + // Scenario 2: null value — no field produced + assertWildcardLuceneAndPluggablePathsEquivalent( + pluggable, + mapping(b -> b.startObject("field").field("type", "wildcard").endObject()), + b -> b.nullField("field"), + "field", + null + ); + + // Scenario 3: null_value configured — substitution kicks in + assertWildcardLuceneAndPluggablePathsEquivalent( + pluggable, + mapping(b -> b.startObject("field").field("type", "wildcard").field("null_value", "default_val").endObject()), + b -> b.nullField("field"), + "field", + "default_val" + ); + + // Scenario 4: ignore_above — value exceeds limit, no field produced + assertWildcardLuceneAndPluggablePathsEquivalent( + pluggable, + mapping(b -> b.startObject("field").field("type", "wildcard").field("ignore_above", 5).endObject()), + b -> b.field("field", "opensearch"), + "field", + null + ); + + // Scenario 5: ignore_above — value within limit + assertWildcardLuceneAndPluggablePathsEquivalent( + pluggable, + mapping(b -> b.startObject("field").field("type", "wildcard").field("ignore_above", 10).endObject()), + b -> b.field("field", "elk"), + "field", + "elk" + ); + } + + private void assertWildcardLuceneAndPluggablePathsEquivalent( + Settings pluggableSettings, + XContentBuilder mappingBuilder, + CheckedConsumer sourceBuilder, + String fieldName, + String expectedValue + ) throws IOException { + // Lucene path + DocumentMapper luceneMapper = createDocumentMapper(mappingBuilder); + ParsedDocument luceneDoc = luceneMapper.parse(source(sourceBuilder)); + IndexableField[] luceneFields = luceneDoc.rootDoc().getFields(fieldName); + + // Pluggable path + DocumentMapper pluggableMapper = createDocumentMapper(pluggableSettings, mappingBuilder); + CapturingDocumentInput docInput = new CapturingDocumentInput(); + pluggableMapper.parse(source(sourceBuilder), docInput); + + if (expectedValue == null) { + assertEquals("Lucene path should produce no field for '" + fieldName + "'", 0, luceneFields.length); + boolean pluggableHasField = docInput.getCapturedFields().stream().anyMatch(e -> e.getKey().name().equals(fieldName)); + assertFalse("Pluggable path should produce no field for '" + fieldName + "'", pluggableHasField); + } else { + assertTrue("Lucene path should produce field '" + fieldName + "'", luceneFields.length > 0); + + boolean pluggableFound = docInput.getCapturedFields() + .stream() + .anyMatch(e -> e.getKey().name().equals(fieldName) && e.getValue().equals(expectedValue)); + assertTrue("Pluggable path should capture field '" + fieldName + "' with value '" + expectedValue + "'", pluggableFound); + } + } } diff --git a/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java index 5c1eb632f63bc..e06a40847c60e 100644 --- a/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java @@ -5322,4 +5322,52 @@ public void testPeriodicFlushTaskExecutesFlush() throws Exception { closeShards(primary); } + + public void testCacheWrapperReader() throws IOException { + Settings settings = Settings.builder() + .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexSettings.INDEX_PERIODIC_FLUSH_INTERVAL_SETTING.getKey(), "1s") + .build(); + + IndexMetadata metadata = IndexMetadata.builder("test") + .putMapping("{ \"properties\": { \"foo\": { \"type\": \"text\"}}}") + .settings(settings) + .primaryTerm(0, 1) + .build(); + + CheckedFunction wrapper = reader -> reader; + + IndexShard primary = newShard(new ShardId(metadata.getIndex(), 0), true, "n1", metadata, wrapper); + recoverShardFromStore(primary); + indexDoc(primary, "_doc", "0", "{\"foo\" : \"bar\"}"); + primary.flush(new FlushRequest()); + + try ( + Engine.SearcherSupplier searcherSupplier = primary.acquireSearcherSupplier(); + Engine.Searcher searcher = searcherSupplier.acquireSearcher("foo") + ) { + DirectoryReader directoryReader = searcher.getDirectoryReader(); + Engine.Searcher wrap = IndexShard.wrapSearcher(searcher, wrapper, primary.nonClosingReaderWrapperSupplier()); + wrap.close(); + assertEquals(1, primary.nonClosingReaderWrapperCache().size()); + DirectoryReader nonClosingReaderWrapper = primary.nonClosingReaderWrapperCache().get(directoryReader); + assertNotNull(nonClosingReaderWrapper); + + // use the cache + wrap = IndexShard.wrapSearcher(searcher, wrapper, primary.nonClosingReaderWrapperSupplier()); + wrap.close(); + assertEquals(1, primary.nonClosingReaderWrapperCache().size()); + DirectoryReader newNonClosingReaderWrapper = primary.nonClosingReaderWrapperCache().get(directoryReader); + assertEquals(nonClosingReaderWrapper, newNonClosingReaderWrapper); + + // not use the cache + wrap = IndexShard.wrapSearcher(searcher, wrapper, null); + assertNotEquals(wrap, newNonClosingReaderWrapper); + wrap.close(); + } + closeShards(primary); + assertTrue(primary.nonClosingReaderWrapperCache().isEmpty()); + } } diff --git a/server/src/test/java/org/opensearch/search/aggregations/startree/MultiTermsAggregatorTests.java b/server/src/test/java/org/opensearch/search/aggregations/startree/MultiTermsAggregatorTests.java index a587d30c04950..e59bfc55b866c 100644 --- a/server/src/test/java/org/opensearch/search/aggregations/startree/MultiTermsAggregatorTests.java +++ b/server/src/test/java/org/opensearch/search/aggregations/startree/MultiTermsAggregatorTests.java @@ -94,6 +94,7 @@ public void testMultiTermsWithStarTree() throws IOException { Directory directory = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(null); conf.setCodec(getCodec()); + conf.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf); // Index documents with values for our dimensions and metrics diff --git a/settings.gradle b/settings.gradle index 4b77ce8436d9d..c18fcaa837716 100644 --- a/settings.gradle +++ b/settings.gradle @@ -126,7 +126,10 @@ project(':example-plugins').projectDir = new File(rootProject.projectDir, 'plugi addSubProjects('', new File(rootProject.projectDir, 'libs')) addSubProjects('', new File(rootProject.projectDir, 'modules')) addSubProjects('', new File(rootProject.projectDir, 'plugins')) -addSubProjects('', new File(rootProject.projectDir, 'sandbox')) +// Sandbox modules require JDK 25+. Include only when explicitly enabled via -Dsandbox.enabled=true. +if (System.getProperty('sandbox.enabled', 'false') == 'true') { + addSubProjects('', new File(rootProject.projectDir, 'sandbox')) +} addSubProjects('', new File(rootProject.projectDir, 'qa')) addSubProjects('test', new File(rootProject.projectDir, 'test/external-modules')) diff --git a/test/framework/src/main/java/org/opensearch/index/mapper/MapperServiceTestCase.java b/test/framework/src/main/java/org/opensearch/index/mapper/MapperServiceTestCase.java index d7c572c0cf9a9..5a61a0b9e6be0 100644 --- a/test/framework/src/main/java/org/opensearch/index/mapper/MapperServiceTestCase.java +++ b/test/framework/src/main/java/org/opensearch/index/mapper/MapperServiceTestCase.java @@ -53,6 +53,7 @@ import org.opensearch.index.analysis.AnalyzerScope; import org.opensearch.index.analysis.IndexAnalyzers; import org.opensearch.index.analysis.NamedAnalyzer; +import org.opensearch.index.engine.dataformat.DocumentInput; import org.opensearch.index.query.QueryShardContext; import org.opensearch.index.similarity.SimilarityService; import org.opensearch.indices.IndicesModule; @@ -66,8 +67,11 @@ import org.opensearch.test.OpenSearchTestCase; import java.io.IOException; +import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.List; +import java.util.Map; import static java.util.Collections.emptyList; import static java.util.Collections.emptyMap; @@ -124,6 +128,14 @@ protected final DocumentMapper createDocumentMapper(String type, String mappings return mapperService.documentMapper(); } + /** + * Create a {@link DocumentMapper} with custom index settings. + * Useful for tests that need specific settings like pluggable dataformat. + */ + protected final DocumentMapper createDocumentMapper(Settings settings, XContentBuilder mapping) throws IOException { + return createMapperService(settings, mapping).documentMapper(); + } + protected MapperService createMapperService(XContentBuilder mappings) throws IOException { return createMapperService(Version.CURRENT, mappings); } @@ -169,6 +181,42 @@ protected final MapperService createMapperService(Version version, XContentBuild return mapperService; } + /** + * Create a {@link MapperService} with custom index settings. + * Useful for tests that need specific settings like pluggable dataformat. + */ + protected final MapperService createMapperService(Settings settings, XContentBuilder mapping) throws IOException { + IndexMetadata meta = IndexMetadata.builder("index") + .settings(Settings.builder().put("index.version.created", Version.CURRENT).put(settings)) + .numberOfReplicas(0) + .numberOfShards(1) + .build(); + IndexSettings indexSettings = new IndexSettings(meta, settings); + MapperRegistry mapperRegistry = new IndicesModule( + getPlugins().stream().filter(p -> p instanceof MapperPlugin).map(p -> (MapperPlugin) p).collect(toList()) + ).getMapperRegistry(); + ScriptModule scriptModule = new ScriptModule( + Settings.EMPTY, + getPlugins().stream().filter(p -> p instanceof ScriptPlugin).map(p -> (ScriptPlugin) p).collect(toList()) + ); + ScriptService scriptService = new ScriptService(settings, scriptModule.engines, scriptModule.contexts); + SimilarityService similarityService = new SimilarityService(indexSettings, scriptService, emptyMap()); + MapperService mapperService = new MapperService( + indexSettings, + createIndexAnalyzers(indexSettings), + xContentRegistry(), + similarityService, + mapperRegistry, + () -> { + throw new UnsupportedOperationException(); + }, + () -> true, + scriptService + ); + merge(mapperService, mapping); + return mapperService; + } + protected final void withLuceneIndex( MapperService mapperService, CheckedConsumer builder, @@ -301,4 +349,31 @@ protected QueryShardContext createQueryShardContext(MapperService mapperService) when(queryShardContext.documentMapper(anyString())).thenReturn(mapperService.documentMapper()); return queryShardContext; } + + /** + * A simple capturing {@link DocumentInput} that records addField calls for assertion in pluggable dataformat tests. + */ + protected static class CapturingDocumentInput implements DocumentInput { + private final List> capturedFields = new ArrayList<>(); + + @Override + public Object getFinalInput() { + return null; + } + + @Override + public void addField(MappedFieldType fieldType, Object value) { + capturedFields.add(Map.entry(fieldType, value)); + } + + @Override + public void setRowId(String rowIdFieldName, long rowId) {} + + @Override + public void close() {} + + public List> getCapturedFields() { + return capturedFields; + } + } } diff --git a/test/framework/src/main/java/org/opensearch/index/mapper/MockFieldMapper.java b/test/framework/src/main/java/org/opensearch/index/mapper/MockFieldMapper.java index 6e504e0f8d20a..e58467cec3983 100644 --- a/test/framework/src/main/java/org/opensearch/index/mapper/MockFieldMapper.java +++ b/test/framework/src/main/java/org/opensearch/index/mapper/MockFieldMapper.java @@ -91,6 +91,9 @@ protected String contentType() { @Override protected void parseCreateField(ParseContext context) {} + @Override + protected void parseCreateFieldForPluggableFormat(ParseContext context) {} + public static class Builder extends ParametrizedFieldMapper.Builder { private final MappedFieldType fieldType;