From 0ff1eec9364b353762d9f14bc9da84131de1067b Mon Sep 17 00:00:00 2001 From: Chen Dai Date: Thu, 7 May 2026 15:39:34 -0700 Subject: [PATCH 1/2] Register `LENGTH`, `REGEXP_REPLACE`, `DATE_TRUNC` in unified function spec (#5419) * feat(api): Register LENGTH, REGEXP_REPLACE, DATE_TRUNC in operator table Add FunctionSpecBuilder DSL with three construction paths: delegateTo() for existing Calcite operators, vararg() for pushdown-only UDFs, and operands() for typed functions with optional late-binding impl. Register LENGTH, REGEXP_REPLACE, and DATE_TRUNC in UnifiedFunctionSpec LIBRARY category. Contribute via CoreExtension registered in UnifiedSqlSpec.extended(). This unblocks ClickBench q28 (LENGTH), q29 (REGEXP_REPLACE), and q43 (DATE_TRUNC) at the SQL Plugin parsing/validation layer. Signed-off-by: Chen Dai * feat(api): Add pre-compilation rule for late-binding function impl Add preCompilationRules() extension point to LanguageSpec that allows extensions to transform the logical plan before in-memory execution only. The plan remains clean for external consumers (Analytics Engine). CoreExtension registers FunctionImplBindingRule which fetches impl bindings from UnifiedFunctionSpec and rewrites custom function calls into executable Calcite expressions at compilation time. DATE_TRUNC now has an impl that rewrites to FLOOR(ts TO unit), making it executable in-memory while preserving DATE_TRUNC in the logical plan for the Analytics Engine path. Signed-off-by: Chen Dai --------- Signed-off-by: Chen Dai --- .../api/compiler/UnifiedQueryCompiler.java | 5 + .../sql/api/spec/FunctionSpecBuilder.java | 186 ++++++++++++++++++ .../opensearch/sql/api/spec/LanguageSpec.java | 17 ++ .../sql/api/spec/UnifiedFunctionSpec.java | 128 +++++------- .../sql/api/spec/UnifiedSqlSpec.java | 3 +- .../sql/api/spec/core/CoreExtension.java | 28 +++ .../spec/core/LateBindingFunctionRule.java | 49 +++++ .../sql/api/UnifiedFunctionSpecTest.java | 79 ++++++++ 8 files changed, 412 insertions(+), 83 deletions(-) create mode 100644 api/src/main/java/org/opensearch/sql/api/spec/FunctionSpecBuilder.java create mode 100644 api/src/main/java/org/opensearch/sql/api/spec/core/CoreExtension.java create mode 100644 api/src/main/java/org/opensearch/sql/api/spec/core/LateBindingFunctionRule.java create mode 100644 api/src/test/java/org/opensearch/sql/api/UnifiedFunctionSpecTest.java diff --git a/api/src/main/java/org/opensearch/sql/api/compiler/UnifiedQueryCompiler.java b/api/src/main/java/org/opensearch/sql/api/compiler/UnifiedQueryCompiler.java index 9caa2125427..4554b3d060d 100644 --- a/api/src/main/java/org/opensearch/sql/api/compiler/UnifiedQueryCompiler.java +++ b/api/src/main/java/org/opensearch/sql/api/compiler/UnifiedQueryCompiler.java @@ -55,6 +55,11 @@ public PreparedStatement compile(@NonNull RelNode plan) { } private PreparedStatement doCompile(RelNode plan) throws Exception { + // Apply pre-compilation rules (e.g., late-binding function impl) + for (var rule : context.getLangSpec().preCompilationRules()) { + plan = plan.accept(rule); + } + // Apply shuttle to convert LogicalTableScan to BindableTableScan final RelHomogeneousShuttle shuttle = new RelHomogeneousShuttle() { diff --git a/api/src/main/java/org/opensearch/sql/api/spec/FunctionSpecBuilder.java b/api/src/main/java/org/opensearch/sql/api/spec/FunctionSpecBuilder.java new file mode 100644 index 00000000000..e1916d33f5b --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/spec/FunctionSpecBuilder.java @@ -0,0 +1,186 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.spec; + +import java.util.List; +import java.util.Objects; +import java.util.function.BiFunction; +import javax.annotation.Nullable; +import lombok.RequiredArgsConstructor; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlCallBinding; +import org.apache.calcite.sql.SqlFunction; +import org.apache.calcite.sql.SqlFunctionCategory; +import org.apache.calcite.sql.SqlIdentifier; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlOperandCountRange; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.parser.SqlParserPos; +import org.apache.calcite.sql.type.InferTypes; +import org.apache.calcite.sql.type.OperandTypes; +import org.apache.calcite.sql.type.SqlOperandCountRanges; +import org.apache.calcite.sql.type.SqlOperandMetadata; +import org.apache.calcite.sql.type.SqlReturnTypeInference; +import org.apache.calcite.sql.type.SqlTypeFamily; +import org.apache.calcite.sql.validate.SqlUserDefinedFunction; + +/** Fluent DSL for building {@link UnifiedFunctionSpec} instances. */ +@RequiredArgsConstructor +class FunctionSpecBuilder { + /** Function name to register. */ + private final String name; + + /** + * Wraps an existing Calcite operator, preserving its native type system and RexImpTable + * implementation for in-memory execution. + * + * @param op the Calcite operator to delegate to + * @return a builder that produces the spec on {@code build()} + */ + DelegateFunctionBuilder delegateTo(SqlOperator op) { + return new DelegateFunctionBuilder(name, op); + } + + /** + * Builds a pushdown-only UDF with relaxed type checking. The resulting function has no local + * implementation and delegates execution to the data source via pushdown. + * + * @param paramNames required parameter names for signature display + * @return a builder that produces the spec on {@code build()} + */ + CatalogFunctionBuilder vararg(String... paramNames) { + return new CatalogFunctionBuilder(name, List.of(paramNames)); + } + + /** + * Builds a typed SqlFunction with strict operand type checking. Optionally accepts a late-binding + * {@code impl} that rewrites the function into executable Calcite expressions at compilation + * time. + * + * @param families operand type families for validation + * @return a builder that produces the spec on {@code build()} + */ + DefaultFunctionBuilder operands(SqlTypeFamily... families) { + return new DefaultFunctionBuilder(name, families); + } + + @RequiredArgsConstructor + static class DefaultFunctionBuilder { + private final String name; + private final SqlTypeFamily[] operandFamilies; + private SqlReturnTypeInference returnType; + private SqlFunctionCategory category = SqlFunctionCategory.USER_DEFINED_FUNCTION; + private @Nullable BiFunction impl; + + DefaultFunctionBuilder returns(SqlReturnTypeInference type) { + this.returnType = type; + return this; + } + + DefaultFunctionBuilder category(SqlFunctionCategory cat) { + this.category = cat; + return this; + } + + /** + * Defines how this function executes by rewriting to existing Calcite operators. Applied only + * at compilation time (late binding) — the logical plan preserves the original function call. + * + * @param impl rewrite function that converts this call into executable RexNodes + * @return this builder + */ + DefaultFunctionBuilder impl(BiFunction impl) { + this.impl = impl; + return this; + } + + UnifiedFunctionSpec build() { + Objects.requireNonNull(returnType, "returns() is required"); + SqlFunction op = + new SqlFunction( + name.toUpperCase(), + SqlKind.OTHER_FUNCTION, + returnType, + null, + OperandTypes.family(operandFamilies), + category); + return new UnifiedFunctionSpec(name.toLowerCase(), op, impl); + } + } + + @RequiredArgsConstructor + static class DelegateFunctionBuilder { + private final String name; + private final SqlOperator operator; + + UnifiedFunctionSpec build() { + return new UnifiedFunctionSpec(name.toLowerCase(), operator, null); + } + } + + @RequiredArgsConstructor + static class CatalogFunctionBuilder { + private final String name; + private final List paramNames; + private SqlReturnTypeInference returnType; + + CatalogFunctionBuilder returnType(SqlReturnTypeInference type) { + this.returnType = type; + return this; + } + + UnifiedFunctionSpec build() { + Objects.requireNonNull(returnType, "returnType is required"); + return new UnifiedFunctionSpec( + name, + new SqlUserDefinedFunction( + new SqlIdentifier(name, SqlParserPos.ZERO), + SqlKind.OTHER_FUNCTION, + returnType, + InferTypes.ANY_NULLABLE, + new VariadicOperandMetadata(paramNames), + List::of), // Pushdown-only: no local implementation + null); + } + } + + /** + * Custom operand metadata that bypasses Calcite's built-in type checking. Calcite's {@code + * FamilyOperandTypeChecker} rejects variadic calls (CALCITE-5366), so this implementation accepts + * any operand types and delegates validation to pushdown. + */ + record VariadicOperandMetadata(List paramNames) implements SqlOperandMetadata { + + @Override + public List paramNames() { + return paramNames; + } + + @Override + public List paramTypes(RelDataTypeFactory tf) { + return List.of(); + } + + @Override + public boolean checkOperandTypes(SqlCallBinding binding, boolean throwOnFailure) { + return true; + } + + @Override + public SqlOperandCountRange getOperandCountRange() { + return SqlOperandCountRanges.from(paramNames.size()); + } + + @Override + public String getAllowedSignatures(SqlOperator op, String opName) { + return opName + "(" + String.join(", ", paramNames) + "[, option=value ...])"; + } + } +} diff --git a/api/src/main/java/org/opensearch/sql/api/spec/LanguageSpec.java b/api/src/main/java/org/opensearch/sql/api/spec/LanguageSpec.java index e824c89f8de..4009ee13bc0 100644 --- a/api/src/main/java/org/opensearch/sql/api/spec/LanguageSpec.java +++ b/api/src/main/java/org/opensearch/sql/api/spec/LanguageSpec.java @@ -57,6 +57,15 @@ default List> postParseRules() { default List postAnalysisRules() { return List.of(); } + + /** + * Pre-compilation rules applied only before in-memory execution. Each rule transforms the + * logical plan (e.g., binding function implementations). Not applied when the plan is consumed + * by external engines. + */ + default List preCompilationRules() { + return List.of(); + } } /** @@ -104,4 +113,12 @@ default List> postParseRules() { default List postAnalysisRules() { return extensions().stream().flatMap(ext -> ext.postAnalysisRules().stream()).toList(); } + + /** + * All pre-compilation rules from registered extensions, flattened in registration order. Applied + * only before in-memory execution. + */ + default List preCompilationRules() { + return extensions().stream().flatMap(ext -> ext.preCompilationRules().stream()).toList(); + } } diff --git a/api/src/main/java/org/opensearch/sql/api/spec/UnifiedFunctionSpec.java b/api/src/main/java/org/opensearch/sql/api/spec/UnifiedFunctionSpec.java index f60fc61a50c..72392b7c520 100644 --- a/api/src/main/java/org/opensearch/sql/api/spec/UnifiedFunctionSpec.java +++ b/api/src/main/java/org/opensearch/sql/api/spec/UnifiedFunctionSpec.java @@ -5,34 +5,36 @@ package org.opensearch.sql.api.spec; +import static org.apache.calcite.sql.SqlFunctionCategory.TIMEDATE; +import static org.apache.calcite.sql.fun.SqlLibraryOperators.LENGTH; +import static org.apache.calcite.sql.fun.SqlLibraryOperators.REGEXP_REPLACE_3; +import static org.apache.calcite.sql.fun.SqlStdOperatorTable.FLOOR; +import static org.apache.calcite.sql.type.ReturnTypes.ARG1_NULLABLE; import static org.apache.calcite.sql.type.ReturnTypes.BOOLEAN; +import static org.apache.calcite.sql.type.SqlTypeFamily.CHARACTER; +import static org.apache.calcite.sql.type.SqlTypeFamily.DATETIME; import java.util.List; import java.util.Map; -import java.util.Objects; import java.util.Optional; +import java.util.function.BiFunction; import java.util.stream.Collectors; import java.util.stream.Stream; +import javax.annotation.Nullable; import lombok.AccessLevel; +import lombok.AllArgsConstructor; import lombok.EqualsAndHashCode; import lombok.Getter; -import lombok.RequiredArgsConstructor; import lombok.ToString; -import org.apache.calcite.rel.type.RelDataType; -import org.apache.calcite.rel.type.RelDataTypeFactory; -import org.apache.calcite.sql.SqlCallBinding; -import org.apache.calcite.sql.SqlIdentifier; -import org.apache.calcite.sql.SqlKind; -import org.apache.calcite.sql.SqlOperandCountRange; +import org.apache.calcite.avatica.util.TimeUnitRange; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.SqlOperatorTable; -import org.apache.calcite.sql.parser.SqlParserPos; -import org.apache.calcite.sql.type.InferTypes; -import org.apache.calcite.sql.type.SqlOperandCountRanges; import org.apache.calcite.sql.type.SqlOperandMetadata; -import org.apache.calcite.sql.type.SqlReturnTypeInference; import org.apache.calcite.sql.util.SqlOperatorTables; -import org.apache.calcite.sql.validate.SqlUserDefinedFunction; /** * Declarative registry of language-level functions for the unified query engine. Functions defined @@ -43,7 +45,7 @@ @Getter @ToString(of = "funcName") @EqualsAndHashCode(of = "funcName") -@RequiredArgsConstructor(access = AccessLevel.PRIVATE) +@AllArgsConstructor(access = AccessLevel.PACKAGE) public final class UnifiedFunctionSpec { /** Function name as registered in the operator table (e.g., "match", "multi_match"). */ @@ -52,6 +54,31 @@ public final class UnifiedFunctionSpec { /** Calcite operator for chaining into the framework config's operator table. */ private final SqlOperator operator; + /** Optional late-binding implementation applied only at compilation time. */ + private final @Nullable BiFunction impl; + + /** Common scalar functions beyond standard. */ + public static final Category SCALAR = + new Category( + List.of( + function("length").delegateTo(LENGTH).build(), + function("regexp_replace").delegateTo(REGEXP_REPLACE_3).build(), + function("date_trunc") + .operands(CHARACTER, DATETIME) + .returns(ARG1_NULLABLE) + .category(TIMEDATE) + .impl( + (rexBuilder, call) -> { + RexLiteral unitLiteral = (RexLiteral) call.operands.get(0); + String unit = unitLiteral.getValueAs(String.class); + RexNode datetime = call.operands.get(1); + return rexBuilder.makeCall( + FLOOR, + datetime, + rexBuilder.makeFlag(TimeUnitRange.valueOf(unit.toUpperCase()))); + }) + .build())); + /** Full-text search functions. */ public static final Category RELEVANCE = new Category( @@ -65,8 +92,8 @@ public final class UnifiedFunctionSpec { function("query_string").vararg("fields", "query").returnType(BOOLEAN).build())); /** All registered function specs, keyed by function name. */ - private static final Map ALL_SPECS = - Stream.of(RELEVANCE) + public static final Map ALL_SPECS = + Stream.of(SCALAR, RELEVANCE) .flatMap(c -> c.specs().stream()) .collect(Collectors.toMap(UnifiedFunctionSpec::getFuncName, s -> s)); @@ -101,71 +128,8 @@ public boolean contains(UnifiedFunctionSpec spec) { } } - public static Builder function(String name) { - return new Builder(name); - } - - /** Fluent builder for function specs. */ - @RequiredArgsConstructor(access = AccessLevel.PRIVATE) - public static class Builder { - private final String funcName; - private List paramNames = List.of(); - private SqlReturnTypeInference returnType; - - public Builder vararg(String... names) { - this.paramNames = List.of(names); - return this; - } - - public Builder returnType(SqlReturnTypeInference type) { - this.returnType = type; - return this; - } - - public UnifiedFunctionSpec build() { - Objects.requireNonNull(returnType, "returnType is required"); - return new UnifiedFunctionSpec( - funcName, - new SqlUserDefinedFunction( - new SqlIdentifier(funcName, SqlParserPos.ZERO), - SqlKind.OTHER_FUNCTION, - returnType, - InferTypes.ANY_NULLABLE, - new VariadicOperandMetadata(paramNames), - List::of)); // Pushdown-only: no local implementation - } - } - - /** - * Custom operand metadata that bypasses Calcite's built-in type checking. Calcite's {@code - * FamilyOperandTypeChecker} rejects variadic calls (CALCITE-5366), so this implementation accepts - * any operand types and delegates validation to pushdown. - */ - private record VariadicOperandMetadata(List paramNames) implements SqlOperandMetadata { - - @Override - public List paramNames() { - return paramNames; - } - - @Override - public List paramTypes(RelDataTypeFactory tf) { - return List.of(); - } - - @Override - public boolean checkOperandTypes(SqlCallBinding binding, boolean throwOnFailure) { - return true; // Bypass: CALCITE-5366 breaks optional argument type checking - } - - @Override - public SqlOperandCountRange getOperandCountRange() { - return SqlOperandCountRanges.from(paramNames.size()); - } - - @Override - public String getAllowedSignatures(SqlOperator op, String opName) { - return opName + "(" + String.join(", ", paramNames) + "[, option=value ...])"; - } + /** Entry point for the function spec builder DSL. */ + private static FunctionSpecBuilder function(String name) { + return new FunctionSpecBuilder(name); } } diff --git a/api/src/main/java/org/opensearch/sql/api/spec/UnifiedSqlSpec.java b/api/src/main/java/org/opensearch/sql/api/spec/UnifiedSqlSpec.java index a5433f015fa..28eeaa89abf 100644 --- a/api/src/main/java/org/opensearch/sql/api/spec/UnifiedSqlSpec.java +++ b/api/src/main/java/org/opensearch/sql/api/spec/UnifiedSqlSpec.java @@ -16,6 +16,7 @@ import org.apache.calcite.sql.parser.babel.SqlBabelParserImpl; import org.apache.calcite.sql.validate.SqlConformanceEnum; import org.apache.calcite.sql.validate.SqlValidator; +import org.opensearch.sql.api.spec.core.CoreExtension; import org.opensearch.sql.api.spec.search.SearchExtension; /** @@ -50,7 +51,7 @@ public static UnifiedSqlSpec extended() { Lex.BIG_QUERY, SqlBabelParserImpl.FACTORY, SqlConformanceEnum.BABEL, - List.of(new SearchExtension())); + List.of(new CoreExtension(), new SearchExtension())); } @Override diff --git a/api/src/main/java/org/opensearch/sql/api/spec/core/CoreExtension.java b/api/src/main/java/org/opensearch/sql/api/spec/core/CoreExtension.java new file mode 100644 index 00000000000..17aa8a20bee --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/spec/core/CoreExtension.java @@ -0,0 +1,28 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.spec.core; + +import java.util.List; +import org.apache.calcite.rel.RelShuttle; +import org.apache.calcite.sql.SqlOperatorTable; +import org.opensearch.sql.api.spec.LanguageSpec; +import org.opensearch.sql.api.spec.UnifiedFunctionSpec; + +/** + * Core extension that extends the default language spec with additional functions and capabilities. + */ +public class CoreExtension implements LanguageSpec.LanguageExtension { + + @Override + public SqlOperatorTable operators() { + return UnifiedFunctionSpec.SCALAR.operatorTable(); + } + + @Override + public List preCompilationRules() { + return List.of(new LateBindingFunctionRule()); + } +} diff --git a/api/src/main/java/org/opensearch/sql/api/spec/core/LateBindingFunctionRule.java b/api/src/main/java/org/opensearch/sql/api/spec/core/LateBindingFunctionRule.java new file mode 100644 index 00000000000..3294d21a241 --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/spec/core/LateBindingFunctionRule.java @@ -0,0 +1,49 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.spec.core; + +import java.util.Map; +import java.util.Optional; +import java.util.function.BiFunction; +import java.util.stream.Collectors; +import org.apache.calcite.rel.RelHomogeneousShuttle; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexShuttle; +import org.apache.calcite.sql.SqlOperator; +import org.opensearch.sql.api.spec.UnifiedFunctionSpec; + +/** + * Binds custom function implementations at compilation time by rewriting to executable Calcite + * expressions. + */ +class LateBindingFunctionRule extends RelHomogeneousShuttle { + + /** Operator-to-impl mappings collected from all function specs. */ + private final Map> bindings = + UnifiedFunctionSpec.ALL_SPECS.values().stream() + .filter(spec -> spec.getImpl() != null) + .collect( + Collectors.toMap(UnifiedFunctionSpec::getOperator, UnifiedFunctionSpec::getImpl)); + + @Override + public RelNode visit(RelNode node) { + RelNode visited = super.visit(node); + RexBuilder rexBuilder = node.getCluster().getRexBuilder(); + return visited.accept( + new RexShuttle() { + @Override + public RexNode visitCall(RexCall call) { + RexCall visited = (RexCall) super.visitCall(call); + return Optional.ofNullable(bindings.get(visited.getOperator())) + .map(impl -> impl.apply(rexBuilder, visited)) + .orElse(visited); + } + }); + } +} diff --git a/api/src/test/java/org/opensearch/sql/api/UnifiedFunctionSpecTest.java b/api/src/test/java/org/opensearch/sql/api/UnifiedFunctionSpecTest.java new file mode 100644 index 00000000000..a16fa116b42 --- /dev/null +++ b/api/src/test/java/org/opensearch/sql/api/UnifiedFunctionSpecTest.java @@ -0,0 +1,79 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.Timestamp; +import org.apache.calcite.rel.RelNode; +import org.junit.Before; +import org.junit.Test; +import org.opensearch.sql.api.compiler.UnifiedQueryCompiler; +import org.opensearch.sql.api.spec.UnifiedFunctionSpec; +import org.opensearch.sql.executor.QueryType; + +/** + * Tests for scalar functions registered in {@link UnifiedFunctionSpec#SCALAR}. Verifies planning + * (function resolves correctly) and execution (produces correct results in-memory). + */ +public class UnifiedFunctionSpecTest extends UnifiedQueryTestBase { + + private UnifiedQueryCompiler compiler; + + @Override + protected QueryType queryType() { + return QueryType.SQL; + } + + @Before + public void setUp() { + super.setUp(); + compiler = new UnifiedQueryCompiler(context); + } + + @Test + public void testLength() throws Exception { + assertEquals(5, eval("LENGTH('hello')")); + assertEquals(0, eval("LENGTH('')")); + } + + @Test + public void testRegexpReplace() throws Exception { + assertEquals("XbcXbc", eval("REGEXP_REPLACE('abcabc', 'a', 'X')")); + assertEquals("hello", eval("REGEXP_REPLACE('hello', 'xyz', 'X')")); + } + + @Test + public void testDateTrunc() throws Exception { + // Plan preserves DATE_TRUNC (late binding — not rewritten until compilation) + givenQuery( + "SELECT DATE_TRUNC('minute', TIMESTAMP '2023-01-01 12:34:56') FROM catalog.employees") + .assertPlanContains("DATE_TRUNC('minute', 2023-01-01 12:34:56)"); + + // Execution rewrites to FLOOR and produces truncated timestamp + Object result = eval("DATE_TRUNC('hour', TIMESTAMP '2023-07-15 14:30:45')"); + assertEquals(Timestamp.valueOf("2023-07-15 14:00:00"), result); + } + + @Test + public void testFunctionSpecLookup() { + assertTrue(UnifiedFunctionSpec.of("length").isPresent()); + assertTrue(UnifiedFunctionSpec.of("regexp_replace").isPresent()); + assertTrue(UnifiedFunctionSpec.of("date_trunc").isPresent()); + } + + private Object eval(String expr) throws Exception { + RelNode plan = planner.plan("SELECT " + expr + " AS v FROM (VALUES (0)) AS t(dummy)"); + try (PreparedStatement stmt = compiler.compile(plan); + ResultSet rs = stmt.executeQuery()) { + assertTrue(rs.next()); + return rs.getObject(1); + } + } +} From 899564996a695c7cc343dea91a132c75e58ee767 Mon Sep 17 00:00:00 2001 From: Chen Dai Date: Thu, 7 May 2026 11:14:50 -0700 Subject: [PATCH 2/2] feat(api): Add pre-compilation adapter for datetime UDF type bridging Add preCompilationRules to LanguageSpec.LanguageExtension and register DatetimeUdfCompilationAdapterRule in DatetimeExtension. The rule inserts CAST nodes to bridge the type mismatch between normalized standard types (DATE/TIME/TIMESTAMP as int/long) and PPL UDF implementors (which expect and produce String values): Before: LAST_DAY($2:DATE) : DATE After: CAST(LAST_DAY(CAST($2 AS VARCHAR)):VARCHAR AS DATE) Applied only in UnifiedQueryCompiler before Enumerable code generation, so the logical plan seen by other consumers (Analytics Engine, Substrait) remains clean with standard types. Signed-off-by: Chen Dai --- .../api/spec/datetime/DatetimeExtension.java | 5 + .../DatetimeUdfCompilationAdapterRule.java | 85 +++++++++++ .../spec/datetime/DatetimeExtensionTest.java | 137 ++++++++++-------- 3 files changed, 168 insertions(+), 59 deletions(-) create mode 100644 api/src/main/java/org/opensearch/sql/api/spec/datetime/DatetimeUdfCompilationAdapterRule.java diff --git a/api/src/main/java/org/opensearch/sql/api/spec/datetime/DatetimeExtension.java b/api/src/main/java/org/opensearch/sql/api/spec/datetime/DatetimeExtension.java index 944ac4a4bf1..f317e9960c2 100644 --- a/api/src/main/java/org/opensearch/sql/api/spec/datetime/DatetimeExtension.java +++ b/api/src/main/java/org/opensearch/sql/api/spec/datetime/DatetimeExtension.java @@ -25,6 +25,11 @@ public List postAnalysisRules() { return List.of(DatetimeUdtNormalizeRule.INSTANCE, DatetimeOutputCastRule.INSTANCE); } + @Override + public List preCompilationRules() { + return List.of(DatetimeUdfCompilationAdapterRule.INSTANCE); + } + /** Maps datetime UDT types to their standard Calcite equivalents. */ @Getter @RequiredArgsConstructor diff --git a/api/src/main/java/org/opensearch/sql/api/spec/datetime/DatetimeUdfCompilationAdapterRule.java b/api/src/main/java/org/opensearch/sql/api/spec/datetime/DatetimeUdfCompilationAdapterRule.java new file mode 100644 index 00000000000..daf9ef4fdfb --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/spec/datetime/DatetimeUdfCompilationAdapterRule.java @@ -0,0 +1,85 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.spec.datetime; + +import static org.opensearch.sql.api.spec.datetime.DatetimeExtension.UdtMapping.isDatetimeType; + +import java.util.ArrayList; +import java.util.List; +import lombok.AccessLevel; +import lombok.NoArgsConstructor; +import org.apache.calcite.rel.RelHomogeneousShuttle; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexShuttle; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.sql.validate.SqlUserDefinedFunction; + +/** + * Adapts datetime UDF calls for Enumerable compilation. PPL UDF implementors expect String + * input/output, but after normalization the plan uses standard DATE/TIME/TIMESTAMP types + * (int/long). This rule inserts CASTs to bridge the mismatch: + * + *
+ *   Before: LAST_DAY($2:DATE) : DATE
+ *   After:  CAST(LAST_DAY(CAST($2 AS VARCHAR)):VARCHAR AS DATE)
+ * 
+ */ +@NoArgsConstructor(access = AccessLevel.PACKAGE) +class DatetimeUdfCompilationAdapterRule extends RelHomogeneousShuttle { + + static final DatetimeUdfCompilationAdapterRule INSTANCE = new DatetimeUdfCompilationAdapterRule(); + + @Override + public RelNode visit(RelNode other) { + RelNode visited = super.visit(other); + RexBuilder rexBuilder = visited.getCluster().getRexBuilder(); + RelDataTypeFactory typeFactory = rexBuilder.getTypeFactory(); + return visited.accept( + new RexShuttle() { + @Override + public RexNode visitCall(RexCall call) { + call = (RexCall) super.visitCall(call); + if (!(call.getOperator() instanceof SqlUserDefinedFunction)) { + return call; + } + + // Adapt operands: CAST(datetime_operand AS VARCHAR) for UDF implementor + List adapted = new ArrayList<>(call.getOperands().size()); + boolean operandsChanged = false; + for (RexNode operand : call.getOperands()) { + if (isDatetimeType(operand.getType().getSqlTypeName())) { + RelDataType varcharType = + typeFactory.createTypeWithNullability( + typeFactory.createSqlType(SqlTypeName.VARCHAR), + operand.getType().isNullable()); + adapted.add(rexBuilder.makeCast(varcharType, operand)); + operandsChanged = true; + } else { + adapted.add(operand); + } + } + + // Adapt result: if return type is datetime, wrap call with VARCHAR return + CAST back + if (isDatetimeType(call.getType().getSqlTypeName())) { + RelDataType declaredType = call.getType(); + RelDataType varcharType = + typeFactory.createTypeWithNullability( + typeFactory.createSqlType(SqlTypeName.VARCHAR), declaredType.isNullable()); + RexCall varcharCall = + call.clone(varcharType, operandsChanged ? adapted : call.getOperands()); + return rexBuilder.makeCast(declaredType, varcharCall); + } + + return operandsChanged ? call.clone(call.getType(), adapted) : call; + } + }); + } +} diff --git a/api/src/test/java/org/opensearch/sql/api/spec/datetime/DatetimeExtensionTest.java b/api/src/test/java/org/opensearch/sql/api/spec/datetime/DatetimeExtensionTest.java index fc089150109..6d6b2ebf8aa 100644 --- a/api/src/test/java/org/opensearch/sql/api/spec/datetime/DatetimeExtensionTest.java +++ b/api/src/test/java/org/opensearch/sql/api/spec/datetime/DatetimeExtensionTest.java @@ -71,114 +71,97 @@ private Table createEventsTable() { } @Test - public void testUdfResultNormalizedAndCastToVarchar() { + public void testUdfOnLiteralsNormalizedAndExecutable() throws Exception { var plan = givenQuery( """ source = catalog.events \ - | eval d = DATE(name), t = TIME(name), ts = TIMESTAMP(name) \ + | eval d = DATE('2024-01-01'), t = TIME('12:30:00'), ts = TIMESTAMP('2024-01-01 12:30:00') \ | fields d, t, ts\ """) .assertPlan( """ LogicalProject(d=[CAST($0):VARCHAR], t=[CAST($1):VARCHAR], ts=[CAST($2):VARCHAR]) - LogicalProject(d=[DATE($1)], t=[TIME($1)], ts=[TIMESTAMP($1)]) + LogicalProject(d=[DATE('2024-01-01':VARCHAR)], t=[TIME('12:30:00':VARCHAR)], ts=[TIMESTAMP('2024-01-01 12:30:00':VARCHAR)]) LogicalTableScan(table=[[catalog, events]]) """) .plan(); assertCallType(plan, "DATE", DATE); assertCallType(plan, "TIME", TIME, 9); assertCallType(plan, "TIMESTAMP", TIMESTAMP, 9); + try (PreparedStatement stmt = compiler.compile(plan)) { + ResultSet rs = stmt.executeQuery(); + verify(rs) + .expectSchema( + col("d", java.sql.Types.VARCHAR), + col("t", java.sql.Types.VARCHAR), + col("ts", java.sql.Types.VARCHAR)) + .expectData( + row("2024-01-01", "12:30:00", "2024-01-01 12:30:00"), + row("2024-01-01", "12:30:00", "2024-01-01 12:30:00")); + } } @Test - public void testNestedUdfCallsNormalized() { + public void testNestedUdfCallsExecutable() throws Exception { var plan = - givenQuery("source = catalog.events | eval d = DATEDIFF(DATE(name), DATE(name)) | fields d") - .assertPlan( + givenQuery( """ - LogicalProject(d=[DATEDIFF(DATE($1), DATE($1))]) - LogicalTableScan(table=[[catalog, events]]) + source = catalog.events \ + | eval d = DATEDIFF(DATE('2025-01-01'), DATE('2024-01-01')) \ + | fields d\ """) - .plan(); - assertCallType(plan, "DATE", DATE); - assertCallType(plan, "DATEDIFF", BIGINT); - } - - @Test - public void testDateLiteralCastToVarchar() { - var plan = - givenQuery("source = catalog.events | eval d = DATE('2024-01-01') | fields d") .assertPlan( """ - LogicalProject(d=[CAST($0):VARCHAR]) - LogicalProject(d=[DATE('2024-01-01':VARCHAR)]) - LogicalTableScan(table=[[catalog, events]]) + LogicalProject(d=[DATEDIFF(DATE('2025-01-01':VARCHAR), DATE('2024-01-01':VARCHAR))]) + LogicalTableScan(table=[[catalog, events]]) """) .plan(); assertCallType(plan, "DATE", DATE); + assertCallType(plan, "DATEDIFF", BIGINT); + try (PreparedStatement stmt = compiler.compile(plan)) { + ResultSet rs = stmt.executeQuery(); + verify(rs).expectSchema(col("d", java.sql.Types.BIGINT)).expectData(row(366L), row(366L)); + } } @Test - public void testFilterWithTimestampLiteral() { + public void testFilterWithTimestampUdf() throws Exception { var plan = givenQuery( """ - source = catalog.events | where created_at > "2024-01-01T00:00:00Z" | fields id\ + source = catalog.events \ + | where created_at < TIMESTAMP('2024-06-01 00:00:00') \ + | fields id\ """) .assertPlan( """ LogicalProject(id=[$0]) - LogicalFilter(condition=[>($4, TIMESTAMP('2024-01-01T00:00:00Z':VARCHAR))]) + LogicalFilter(condition=[<($4, TIMESTAMP('2024-06-01 00:00:00':VARCHAR))]) LogicalTableScan(table=[[catalog, events]]) """) .plan(); assertCallType(plan, "TIMESTAMP", TIMESTAMP, 9); + try (PreparedStatement stmt = compiler.compile(plan)) { + ResultSet rs = stmt.executeQuery(); + verify(rs).expectSchema(col("id", java.sql.Types.INTEGER)).expectData(row(1)); + } } @Test - public void testComparisonWithDatetimeUdf() { + public void testStandardDatetimeFieldsCastToVarchar() throws Exception { var plan = - givenQuery("source = catalog.events | where created_at < DATE(name) | fields id") + givenQuery("source = catalog.events | fields hire_date, start_time, created_at") .assertPlan( """ - LogicalProject(id=[$0]) - LogicalFilter(condition=[<($4, TIMESTAMP(DATE($1)))]) + LogicalProject(hire_date=[CAST($0):VARCHAR NOT NULL], start_time=[CAST($1):VARCHAR NOT NULL], created_at=[CAST($2):VARCHAR NOT NULL]) + LogicalProject(hire_date=[$2], start_time=[$3], created_at=[$4]) LogicalTableScan(table=[[catalog, events]]) """) .plan(); - assertCallType(plan, "DATE", DATE); - assertCallType(plan, "TIMESTAMP", TIMESTAMP, 9); - } - - @Test - public void testAllStandardDatetimeTypesCastToVarchar() { - givenQuery("source = catalog.events | fields hire_date, start_time, created_at") - .assertPlan( - """ - LogicalProject(hire_date=[CAST($0):VARCHAR NOT NULL], start_time=[CAST($1):VARCHAR NOT NULL], created_at=[CAST($2):VARCHAR NOT NULL]) - LogicalProject(hire_date=[$2], start_time=[$3], created_at=[$4]) - LogicalTableScan(table=[[catalog, events]]) - """); - } - - @Test - public void testNonDatetimeFieldsNotWrapped() { - givenQuery("source = catalog.events | fields id, name") - .assertPlan( - """ - LogicalProject(id=[$0], name=[$1]) - LogicalTableScan(table=[[catalog, events]]) - """); - } - - @Test - public void testOutputCastCanCompileAndExecute() throws Exception { - RelNode plan = - planner.plan("source = catalog.events | fields hire_date, start_time, created_at"); - try (PreparedStatement statement = compiler.compile(plan)) { - ResultSet resultSet = statement.executeQuery(); - verify(resultSet) + try (PreparedStatement stmt = compiler.compile(plan)) { + ResultSet rs = stmt.executeQuery(); + verify(rs) .expectSchema( col("hire_date", java.sql.Types.VARCHAR), col("start_time", java.sql.Types.VARCHAR), @@ -189,6 +172,42 @@ public void testOutputCastCanCompileAndExecute() throws Exception { } } + @Test + public void testNonDatetimeFieldsNotWrapped() throws Exception { + var plan = + givenQuery("source = catalog.events | fields id, name") + .assertPlan( + """ + LogicalProject(id=[$0], name=[$1]) + LogicalTableScan(table=[[catalog, events]]) + """) + .plan(); + try (PreparedStatement stmt = compiler.compile(plan)) { + ResultSet rs = stmt.executeQuery(); + verify(rs) + .expectSchema(col("id", java.sql.Types.INTEGER), col("name", java.sql.Types.VARCHAR)) + .expectData(row(1, "Alice"), row(2, "Bob")); + } + } + + @Test + public void testNonDatetimeUdfUnaffected() throws Exception { + var plan = + givenQuery("source = catalog.events | eval s = CONCAT(name, ' test') | fields s") + .assertPlan( + """ + LogicalProject(s=[CONCAT($1, ' test':VARCHAR)]) + LogicalTableScan(table=[[catalog, events]]) + """) + .plan(); + try (PreparedStatement stmt = compiler.compile(plan)) { + ResultSet rs = stmt.executeQuery(); + verify(rs) + .expectSchema(col("s", java.sql.Types.VARCHAR)) + .expectData(row("Alice test"), row("Bob test")); + } + } + private static void assertCallType(RelNode plan, String operatorName, SqlTypeName expectedType) { assertCallType(plan, operatorName, expectedType, -1); }