diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java b/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java index 5f58dea227e..c2783804e07 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java @@ -46,6 +46,10 @@ public class CalcitePlanContext { private static final ThreadLocal legacyPreferredFlag = ThreadLocal.withInitial(() -> true); + /** Thread-local QueryType for use by functions that lack access to CalcitePlanContext. */ + private static final ThreadLocal currentQueryType = + ThreadLocal.withInitial(() -> QueryType.PPL); + @Getter @Setter private HighlightConfig highlightConfig; @Getter @Setter private boolean isResolvingJoinCondition = false; @Getter @Setter private boolean isResolvingSubquery = false; @@ -78,6 +82,7 @@ private CalcitePlanContext(FrameworkConfig config, SysLimit sysLimit, QueryType this.config = config; this.sysLimit = sysLimit; this.queryType = queryType; + currentQueryType.set(queryType); this.connection = CalciteToolsHelper.connect(config, TYPE_FACTORY); this.relBuilder = CalciteToolsHelper.create(config, TYPE_FACTORY, connection); this.rexBuilder = new ExtendedRexBuilder(relBuilder.getRexBuilder()); @@ -158,6 +163,7 @@ public static void run(Runnable action, Settings settings) { action.run(); } finally { legacyPreferredFlag.remove(); + currentQueryType.remove(); } } @@ -168,6 +174,13 @@ public static boolean isLegacyPreferred() { return legacyPreferredFlag.get(); } + /** + * @return the current QueryType from thread-local context. + */ + public static QueryType getCurrentQueryType() { + return currentQueryType.get(); + } + public void putRexLambdaRefMap(Map candidateMap) { this.rexLambdaRefMap.putAll(candidateMap); } diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 1251f51b131..48a93085e4b 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -132,6 +132,7 @@ import org.opensearch.sql.ast.tree.Head; import org.opensearch.sql.ast.tree.Join; import org.opensearch.sql.ast.tree.Kmeans; +import org.opensearch.sql.ast.tree.Limit; import org.opensearch.sql.ast.tree.Lookup; import org.opensearch.sql.ast.tree.Lookup.OutputStrategy; import org.opensearch.sql.ast.tree.ML; @@ -246,7 +247,7 @@ public RelNode visitRelation(Relation node, CalcitePlanContext context) { if (nameResolver.getSchemaName().equals(INFORMATION_SCHEMA_NAME)) { throw new CalciteUnsupportedException("information_schema is unsupported in Calcite"); } - context.relBuilder.scan(node.getTableQualifiedName().getParts()); + context.relBuilder.scan(List.of(nameResolver.getIdentifierName())); RelNode scan = context.relBuilder.peek(); // Eagerly push down highlight config to the scan (highlight is a scan hint, not an operator) @@ -541,6 +542,16 @@ private List expandProjectFields( .filter(addedFields::add) .forEach(field -> expandedFields.add(context.relBuilder.field(field))); } + case Alias alias -> { + String aliasName = + Strings.isNullOrEmpty(alias.getAlias()) ? alias.getName() : alias.getAlias(); + if (alias.getDelegated() instanceof AggregateFunction + && currentFields.contains(aliasName)) { + expandedFields.add(context.relBuilder.field(aliasName)); + } else { + expandedFields.add(rexVisitor.analyze(alias, context)); + } + } default -> throw new IllegalStateException( "Unexpected expression type in project list: " + expr.getClass().getSimpleName()); @@ -763,6 +774,13 @@ public RelNode visitHead(Head node, CalcitePlanContext context) { return context.relBuilder.peek(); } + @Override + public RelNode visitLimit(Limit node, CalcitePlanContext context) { + visitChildren(node, context); + context.relBuilder.limit(node.getOffset(), node.getLimit()); + return context.relBuilder.peek(); + } + /** * Insert a reversed sort node after finding the original sort in the tree. This rebuilds the tree * with the reversed sort inserted right after the original sort. @@ -1621,7 +1639,9 @@ private Pair, List> resolveAttributesForAggregation( @Override public RelNode visitAggregation(Aggregation node, CalcitePlanContext context) { Argument.ArgumentMap statsArgs = Argument.ArgumentMap.of(node.getArgExprList()); - Boolean bucketNullable = (Boolean) statsArgs.get(Argument.BUCKET_NULLABLE).getValue(); + Literal bucketNullableLit = statsArgs.get(Argument.BUCKET_NULLABLE); + Boolean bucketNullable = + bucketNullableLit != null ? (Boolean) bucketNullableLit.getValue() : true; int nGroup = node.getGroupExprList().size() + (Objects.nonNull(node.getSpan()) ? 1 : 0); BitSet nonNullGroupMask = new BitSet(nGroup); if (!bucketNullable) { diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java index c2ce4a740ec..b89b831cc57 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java @@ -18,6 +18,7 @@ import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Set; import java.util.stream.Collectors; import java.util.stream.IntStream; import javax.annotation.Nullable; @@ -42,6 +43,7 @@ import org.apache.calcite.util.TimestampString; import org.apache.logging.log4j.util.Strings; import org.opensearch.sql.ast.AbstractNodeVisitor; +import org.opensearch.sql.ast.expression.AggregateFunction; import org.opensearch.sql.ast.expression.Alias; import org.opensearch.sql.ast.expression.And; import org.opensearch.sql.ast.expression.Between; @@ -83,6 +85,7 @@ import org.opensearch.sql.exception.CalciteUnsupportedException; import org.opensearch.sql.exception.ExpressionEvaluationException; import org.opensearch.sql.exception.SemanticCheckException; +import org.opensearch.sql.executor.QueryType; import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.expression.function.PPLFuncImpTable; @@ -90,6 +93,10 @@ public class CalciteRexNodeVisitor extends AbstractNodeVisitor { private final CalciteRelNodeVisitor planVisitor; + private static final Set PURE_WINDOW_FUNCTIONS = + Set.of( + BuiltinFunctionName.ROW_NUMBER, BuiltinFunctionName.RANK, BuiltinFunctionName.DENSE_RANK); + public RexNode analyze(UnresolvedExpression unresolved, CalcitePlanContext context) { return unresolved.accept(this, context); } @@ -338,8 +345,14 @@ public RexNode visitQualifiedName(QualifiedName node, CalcitePlanContext context public RexNode visitAlias(Alias node, CalcitePlanContext context) { RexNode expr = analyze(node.getDelegated(), context); // Only OpenSearch SQL uses node.getAlias, OpenSearch PPL uses node.getName. - return context.relBuilder.alias( - expr, Strings.isEmpty(node.getAlias()) ? node.getName() : node.getAlias()); + String aliasName = Strings.isEmpty(node.getAlias()) ? node.getName() : node.getAlias(); + // For SQL queries, encode expression name and alias in field name so the response + // builder can reconstruct both (V2 compatibility: name=expr, alias=AS-alias). + if (context.queryType == QueryType.SQL && !Strings.isEmpty(node.getAlias())) { + String exprName = node.getName() != null ? node.getName() : aliasName; + aliasName = exprName + "\u0000" + node.getAlias(); + } + return context.relBuilder.alias(expr, aliasName); } @Override @@ -563,15 +576,49 @@ public RexNode visitFunction(Function node, CalcitePlanContext context) { @Override public RexNode visitWindowFunction(WindowFunction node, CalcitePlanContext context) { - Function windowFunction = (Function) node.getFunction(); - List arguments = - windowFunction.getFuncArgs().stream().map(arg -> analyze(arg, context)).toList(); + String funcName; + List arguments; + final boolean isDistinct; + if (node.getFunction() instanceof AggregateFunction aggFunc) { + funcName = aggFunc.getFuncName(); + isDistinct = Boolean.TRUE.equals(aggFunc.getDistinct()); + List argExprs = new java.util.ArrayList<>(); + if (aggFunc.getField() != null) { + argExprs.add(aggFunc.getField()); + } + argExprs.addAll(aggFunc.getArgList()); + arguments = argExprs.stream().map(arg -> analyze(arg, context)).toList(); + } else { + Function windowFunction = (Function) node.getFunction(); + funcName = windowFunction.getFuncName(); + isDistinct = false; + arguments = windowFunction.getFuncArgs().stream().map(arg -> analyze(arg, context)).toList(); + } List partitions = node.getPartitionByList().stream() .map(arg -> analyze(arg, context)) .map(this::extractRexNodeFromAlias) .toList(); - return BuiltinFunctionName.ofWindowFunction(windowFunction.getFuncName()) + List orderKeys = + node.getSortList().stream() + .map( + pair -> { + RexNode sortField = analyze(pair.getRight(), context); + if (pair.getLeft().getSortOrder() + == org.opensearch.sql.ast.tree.Sort.SortOrder.DESC) { + sortField = context.relBuilder.desc(sortField); + } + if (pair.getLeft().getNullOrder() + == org.opensearch.sql.ast.tree.Sort.NullOrder.NULL_LAST) { + sortField = context.relBuilder.nullsLast(sortField); + } else if (pair.getLeft().getNullOrder() + == org.opensearch.sql.ast.tree.Sort.NullOrder.NULL_FIRST) { + sortField = context.relBuilder.nullsFirst(sortField); + } + return sortField; + }) + .toList(); + return BuiltinFunctionName.ofWindowFunction(funcName) .map( functionName -> { RexNode field = arguments.isEmpty() ? null : arguments.getFirst(); @@ -579,6 +626,18 @@ public RexNode visitWindowFunction(WindowFunction node, CalcitePlanContext conte (arguments.isEmpty() || arguments.size() == 1) ? Collections.emptyList() : arguments.subList(1, arguments.size()); + // Pure window functions (ROW_NUMBER, RANK, DENSE_RANK) are not registered + // in aggFunctionRegistry, so skip validation for them. + if (PURE_WINDOW_FUNCTIONS.contains(functionName)) { + return PlanUtils.makeOver( + context, + functionName, + field, + args, + partitions, + orderKeys, + node.getWindowFrame()); + } List nodes = PPLFuncImpTable.INSTANCE.validateAggFunctionSignature( functionName, field, args, context.rexBuilder); @@ -586,24 +645,24 @@ public RexNode visitWindowFunction(WindowFunction node, CalcitePlanContext conte ? PlanUtils.makeOver( context, functionName, + isDistinct, nodes.getFirst(), nodes.size() <= 1 ? Collections.emptyList() : nodes.subList(1, nodes.size()), partitions, - List.of(), + orderKeys, node.getWindowFrame()) : PlanUtils.makeOver( context, functionName, + isDistinct, field, args, partitions, - List.of(), + orderKeys, node.getWindowFrame()); }) .orElseThrow( - () -> - new UnsupportedOperationException( - "Unexpected window function: " + windowFunction.getFuncName())); + () -> new UnsupportedOperationException("Unexpected window function: " + funcName)); } /** extract the expression of Alias from a node */ diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java index 4d2dae4bd60..d828f508862 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java @@ -171,6 +171,19 @@ static RexNode makeOver( List partitions, List orderKeys, @Nullable WindowFrame windowFrame) { + return makeOver( + context, functionName, false, field, argList, partitions, orderKeys, windowFrame); + } + + static RexNode makeOver( + CalcitePlanContext context, + BuiltinFunctionName functionName, + boolean distinct, + RexNode field, + List argList, + List partitions, + List orderKeys, + @Nullable WindowFrame windowFrame) { if (windowFrame == null) { windowFrame = WindowFrame.rowsUnbounded(); } @@ -216,6 +229,22 @@ static RexNode makeOver( true, lowerBound, upperBound); + case RANK: + return withOver( + context.relBuilder.aggregateCall(SqlStdOperatorTable.RANK), + partitions, + orderKeys, + true, + lowerBound, + upperBound); + case DENSE_RANK: + return withOver( + context.relBuilder.aggregateCall(SqlStdOperatorTable.DENSE_RANK), + partitions, + orderKeys, + true, + lowerBound, + upperBound); case NTH_VALUE: return withOver( context.relBuilder.aggregateCall(SqlStdOperatorTable.NTH_VALUE, field, argList.get(0)), @@ -226,7 +255,7 @@ static RexNode makeOver( upperBound); default: return withOver( - makeAggCall(context, functionName, false, field, argList), + makeAggCall(context, functionName, distinct, field, argList), partitions, orderKeys, rows, diff --git a/core/src/main/java/org/opensearch/sql/executor/QueryService.java b/core/src/main/java/org/opensearch/sql/executor/QueryService.java index fe9d3e55dc1..cd91c6b9a6e 100644 --- a/core/src/main/java/org/opensearch/sql/executor/QueryService.java +++ b/core/src/main/java/org/opensearch/sql/executor/QueryService.java @@ -358,9 +358,10 @@ private boolean isCalciteEnabled(Settings settings) { } // TODO https://github.com/opensearch-project/sql/issues/3457 - // Calcite is not available for SQL query now. Maybe release in 3.1.0? + // TODO: PoC only — in production, SQL routes through unified query API (RestUnifiedQueryAction), + // not through this flag. Remove this change when migrating to the unified query path. private boolean shouldUseCalcite(QueryType queryType) { - return isCalciteEnabled(settings) && queryType == QueryType.PPL; + return isCalciteEnabled(settings) && (queryType == QueryType.PPL || queryType == QueryType.SQL); } private FrameworkConfig buildFrameworkConfig() { diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index 14f058a75d0..ddefaa3fec0 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -425,6 +425,11 @@ public enum BuiltinFunctionName { .put("dc", BuiltinFunctionName.DISTINCT_COUNT_APPROX) .put("distinct_count", BuiltinFunctionName.DISTINCT_COUNT_APPROX) .put("pattern", BuiltinFunctionName.INTERNAL_PATTERN) + .put("row_number", BuiltinFunctionName.ROW_NUMBER) + .put("rank", BuiltinFunctionName.RANK) + .put("dense_rank", BuiltinFunctionName.DENSE_RANK) + .put("percentile", BuiltinFunctionName.PERCENTILE_APPROX) + .put("percentile_approx", BuiltinFunctionName.PERCENTILE_APPROX) .build(); public static Optional of(String str) { diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java index 0a5b0fe0e03..3ec1000002c 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java @@ -424,6 +424,8 @@ public class PPLBuiltinOperators extends ReflectiveSqlOperatorTable { RELEVANCE_QUERY_FUNCTION_INSTANCE.toUDF("query_string", false); public static final SqlOperator MULTI_MATCH = RELEVANCE_QUERY_FUNCTION_INSTANCE.toUDF("multi_match", false); + public static final SqlOperator WILDCARD_QUERY = + RELEVANCE_QUERY_FUNCTION_INSTANCE.toUDF("wildcard_query"); public static final SqlOperator NUMBER_TO_STRING = new NumberToStringFunction().toUDF("NUMBER_TO_STRING"); public static final SqlOperator TONUMBER = new ToNumberFunction().toUDF("TONUMBER"); diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index 849c60fe4eb..5cbfdd57d7c 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -92,6 +92,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_REGEXP_REPLACE_5; import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_REGEXP_REPLACE_PG_4; import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_TRANSLATE3; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.ISNULL; import static org.opensearch.sql.expression.function.BuiltinFunctionName.IS_BLANK; import static org.opensearch.sql.expression.function.BuiltinFunctionName.IS_EMPTY; import static org.opensearch.sql.expression.function.BuiltinFunctionName.IS_NOT_NULL; @@ -133,9 +134,13 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.MAP_CONCAT; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MAP_REMOVE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MATCH; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MATCHPHRASE; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MATCHPHRASEQUERY; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MATCHQUERY; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MATCH_BOOL_PREFIX; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MATCH_PHRASE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MATCH_PHRASE_PREFIX; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MATCH_QUERY; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MAX; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MD5; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MEDIAN; @@ -154,6 +159,8 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.MONTHNAME; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MONTH_OF_YEAR; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MSTIME; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MULTIMATCH; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MULTIMATCHQUERY; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MULTIPLY; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MULTIPLYFUNCTION; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MULTI_MATCH; @@ -178,6 +185,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.POW; import static org.opensearch.sql.expression.function.BuiltinFunctionName.POWER; import static org.opensearch.sql.expression.function.BuiltinFunctionName.QUARTER; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.QUERY; import static org.opensearch.sql.expression.function.BuiltinFunctionName.QUERY_STRING; import static org.opensearch.sql.expression.function.BuiltinFunctionName.RADIANS; import static org.opensearch.sql.expression.function.BuiltinFunctionName.RAND; @@ -254,6 +262,8 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.WEEKOFYEAR; import static org.opensearch.sql.expression.function.BuiltinFunctionName.WEEK_OF_YEAR; import static org.opensearch.sql.expression.function.BuiltinFunctionName.WIDTH_BUCKET; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.WILDCARDQUERY; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.WILDCARD_QUERY; import static org.opensearch.sql.expression.function.BuiltinFunctionName.XOR; import static org.opensearch.sql.expression.function.BuiltinFunctionName.YEAR; import static org.opensearch.sql.expression.function.BuiltinFunctionName.YEARWEEK; @@ -298,6 +308,8 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.sql.calcite.CalcitePlanContext; +import org.opensearch.sql.calcite.type.AbstractExprRelDataType; +import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.ExprUDT; import org.opensearch.sql.calcite.utils.PPLOperandTypes; import org.opensearch.sql.calcite.utils.PlanUtils; import org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils; @@ -506,6 +518,11 @@ static List validateFunctionArgs( coercionNodes = CoercionUtils.castArguments(rexBuilder, signature.typeChecker(), fields); } if (coercionNodes == null) { + // For SQL queries, skip strict type validation and let Calcite handle type coercion + // (e.g., AVG on TIMESTAMP is valid in SQL but not in PPL). + if (CalcitePlanContext.getCurrentQueryType() == QueryType.SQL) { + return null; + } String errorMessagePattern = argTypes.size() <= 1 ? "Aggregation function %s expects field type {%s}, but got %s" @@ -854,7 +871,7 @@ void populate() { registerOperator(SIN, SqlStdOperatorTable.SIN); registerOperator(CBRT, SqlStdOperatorTable.CBRT); - registerOperator(IFNULL, SqlStdOperatorTable.COALESCE); + registerOperator(IFNULL, PPLBuiltinOperators.ENHANCED_COALESCE); registerOperator(EARLIEST, PPLBuiltinOperators.EARLIEST); registerOperator(LATEST, PPLBuiltinOperators.LATEST); registerOperator(COALESCE, PPLBuiltinOperators.ENHANCED_COALESCE); @@ -908,6 +925,16 @@ void populate() { registerOperator(SIMPLE_QUERY_STRING, PPLBuiltinOperators.SIMPLE_QUERY_STRING); registerOperator(QUERY_STRING, PPLBuiltinOperators.QUERY_STRING); registerOperator(MULTI_MATCH, PPLBuiltinOperators.MULTI_MATCH); + // Legacy relevance function aliases + registerOperator(MATCH_QUERY, PPLBuiltinOperators.MATCH); + registerOperator(MATCHQUERY, PPLBuiltinOperators.MATCH); + registerOperator(MATCHPHRASE, PPLBuiltinOperators.MATCH_PHRASE); + registerOperator(MATCHPHRASEQUERY, PPLBuiltinOperators.MATCH_PHRASE); + registerOperator(MULTIMATCH, PPLBuiltinOperators.MULTI_MATCH); + registerOperator(MULTIMATCHQUERY, PPLBuiltinOperators.MULTI_MATCH); + registerOperator(QUERY, PPLBuiltinOperators.QUERY_STRING); + registerOperator(WILDCARD_QUERY, PPLBuiltinOperators.WILDCARD_QUERY); + registerOperator(WILDCARDQUERY, PPLBuiltinOperators.WILDCARD_QUERY); registerOperator(REX_EXTRACT, PPLBuiltinOperators.REX_EXTRACT); registerOperator(REX_EXTRACT_MULTI, PPLBuiltinOperators.REX_EXTRACT_MULTI); registerOperator(REX_OFFSET, PPLBuiltinOperators.REX_OFFSET); @@ -1147,6 +1174,8 @@ void populate() { IS_PRESENT, SqlStdOperatorTable.IS_NOT_NULL, PPLTypeChecker.family(SqlTypeFamily.IGNORE)); registerOperator( IS_NULL, SqlStdOperatorTable.IS_NULL, PPLTypeChecker.family(SqlTypeFamily.IGNORE)); + registerOperator( + ISNULL, SqlStdOperatorTable.IS_NULL, PPLTypeChecker.family(SqlTypeFamily.IGNORE)); // Register implementation. // Note, make the implementation an individual class if too complex. @@ -1248,7 +1277,8 @@ void populate() { TYPEOF, (FunctionImp1) (builder, arg) -> - builder.makeLiteral(getLegacyTypeName(arg.getType(), QueryType.PPL)), + builder.makeLiteral( + getLegacyTypeName(arg.getType(), CalcitePlanContext.getCurrentQueryType())), null); register( NULLIF, @@ -1362,7 +1392,18 @@ void populate() { register( AVG, - (distinct, field, argList, ctx) -> ctx.relBuilder.avg(distinct, null, field), + (distinct, field, argList, ctx) -> { + // For temporal types (UDT or standard), cast to BIGINT before AVG + if (field != null + && CalcitePlanContext.getCurrentQueryType() == QueryType.SQL + && isTemporalType(field.getType())) { + RexNode castField = + ctx.rexBuilder.makeCast( + ctx.rexBuilder.getTypeFactory().createSqlType(SqlTypeName.BIGINT), field); + return ctx.relBuilder.avg(distinct, null, castField); + } + return ctx.relBuilder.avg(distinct, null, field); + }, wrapSqlOperandTypeChecker( SqlStdOperatorTable.AVG.getOperandTypeChecker(), AVG.name(), false)); @@ -1463,6 +1504,15 @@ void populate() { } } + /** Returns true if the type is a datetime UDT or standard datetime SqlTypeName. */ + private static boolean isTemporalType(RelDataType type) { + if (type instanceof AbstractExprRelDataType udt) { + ExprUDT u = udt.getUdt(); + return u == ExprUDT.EXPR_DATE || u == ExprUDT.EXPR_TIME || u == ExprUDT.EXPR_TIMESTAMP; + } + return SqlTypeFamily.DATETIME.getTypeNames().contains(type.getSqlTypeName()); + } + static List resolveTimeField(List argList, CalcitePlanContext ctx) { if (argList.isEmpty()) { // Try to find @timestamp field diff --git a/docs/report/2026-05-13-sql-v2-calcite-gap-analysis-updated.md b/docs/report/2026-05-13-sql-v2-calcite-gap-analysis-updated.md new file mode 100644 index 00000000000..eace973da9d --- /dev/null +++ b/docs/report/2026-05-13-sql-v2-calcite-gap-analysis-updated.md @@ -0,0 +1,153 @@ +# SQL V2 → CalciteRelNodeVisitor: Updated Gap Analysis + +**Date**: 2026-05-13 (updated) +**Previous Pass Rate**: 572 / 964 (59.3%) — 2026-05-12 +**Current Pass Rate**: 491 / 609 (80.6%) — V2 + Legacy ITs that actually run +**Branch**: sql-v2-calcite-gap-analysis + +--- + +## Progress Summary + +| Gap ID | Description | Original Status | Current Status | Tests Fixed | +|--------|-------------|----------------|----------------|-------------| +| B1+B16 | ArgumentMap NPE (BUCKET_NULLABLE) | ~86 failures | ✅ **Already fixed** (pre-existing in branch) | — | +| B2 | AggregateFunction → Function cast | ~29 failures | ✅ **Already fixed** (pre-existing in branch) | — | +| B7 | Relevance function aliases | ~14 failures | ✅ **Already fixed** (pre-existing in branch) | — | +| B8+B19 | visitLimit missing | ~10 failures | ✅ **Already fixed** (pre-existing in branch) | — | +| B9 | Window functions (ROW_NUMBER, RANK, DENSE_RANK) | ~8 failures | ✅ **FIXED** — added to WINDOW_FUNC_MAPPING + PlanUtils.makeOver | ROW_NUMBER resolves | +| B10 | Table name resolution (dot separator) | ~6 failures | ✅ **Already fixed** (pre-existing in branch) | — | +| B11 | QUERY function | ~5 failures | ✅ **Already fixed** (pre-existing in branch) | — | +| B13 | Column name = alias instead of expression | ~6 failures | ✅ **FIXED** — alias encoding in field name + decoding in response | ~7 (ConditionalIT) | +| B14 | ISNULL / IFNULL unresolved | ~5 failures | ✅ **Already fixed** (pre-existing in branch) | — | +| B18 | typeof() type name mismatch | 1 failure | 🟡 **Partially fixed** — SQL returns correct names; text/keyword still wrong | typeof works | +| B6 | WILDCARD_QUERY | ~18 failures | 🟡 **Partially fixed** — registered but push-down not implemented | 0 (execution fails) | +| B12 | Schema type mismatch (keyword vs text) | ~5 failures | ❌ **Not fixed** — requires new UDT (M-effort) | 0 | +| B3 | Pagination unsupported | ~35 failures | ⏭️ **Skipped** (out of scope) | — | +| B4 | Table function unsupported | ~26 failures | ⏭️ **Skipped** (out of scope) | — | +| B5 | NESTED function | ~33 failures | ⏭️ **Skipped** (out of scope) | — | +| B15 | Explain format | ~3 failures | ⏭️ **Skipped** (not SELECT-focused) | — | +| B17 | Pagination fallback tests | ~5 failures | ⏭️ **Skipped** (not SELECT-focused) | — | + +--- + +## New Fixes Applied (this session) + +### Fix 1: Window Function ORDER BY Support +**Files**: `CalciteRexNodeVisitor.java`, `PlanUtils.java` + +- `visitWindowFunction` now converts `node.getSortList()` to Calcite orderKeys with ASC/DESC/NULLS handling +- Added RANK and DENSE_RANK cases to `PlanUtils.makeOver` switch +- Added `makeOver` overload with `distinct` parameter for `COUNT(DISTINCT x) OVER(...)` +- Pure window functions (ROW_NUMBER, RANK, DENSE_RANK) bypass `aggFunctionRegistry` validation + +### Fix 2: Column Name/Alias Encoding (B13) +**Files**: `CalciteRexNodeVisitor.java`, `OpenSearchExecutionEngine.java` + +- For SQL queries with AS-alias, encodes `exprName\0alias` in Calcite field name +- Response builder decodes and populates both `Column.name` and `Column.alias` +- Fixed NPE in ANY-type lookup when alias is present + +### Fix 3: typeof() QueryType-Aware (B18) +**Files**: `CalcitePlanContext.java`, `PPLFuncImpTable.java` + +- Added `ThreadLocal` to `CalcitePlanContext` with cleanup in finally block +- `typeof()` uses `getCurrentQueryType()` instead of hardcoded PPL +- SQL queries return OpenSearch type names (LONG, INTEGER); PPL keeps BIGINT, INT + +### Fix 4: AVG Type Validation Bypass for SQL +**File**: `PPLFuncImpTable.java` + +- SQL queries bypass strict PPL type validation for aggregation functions +- Allows AVG on TIMESTAMP/DATE/TIME to pass validation (Calcite handles coercion) + +### Fix 5: AVG on Temporal — Cast to BIGINT (uncommitted) +**File**: `PPLFuncImpTable.java` + +- For SQL queries, detects temporal-typed AVG operands and casts to BIGINT before AVG +- Resolves "Error while preparing plan" — AVG now executes +- Result type is `double` instead of `timestamp` (partial fix — cast-back not implemented) + +### Fix 6: wildcard_query + percentile Registration +**Files**: `PPLBuiltinOperators.java`, `BuiltinFunctionName.java`, `PPLFuncImpTable.java` + +- Registered WILDCARD_QUERY and WILDCARDQUERY operators +- Added percentile/percentile_approx to WINDOW_FUNC_MAPPING + +--- + +## Current Failure Breakdown (609 tests total) + +| Root Cause | Failures | Category | Effort | +|-----------|----------|----------|--------| +| DateTimeComparisonIT type issues | 24 | AE-side | — | +| WildcardQuery push-down | 18 | SQL plugin | M | +| AggregationIT (AVG type + COUNT DISTINCT) | 16 | SQL plugin | M | +| text vs keyword type mapping | 16 | SQL plugin | M (UDT) | +| Legacy AggregationExpressionIT | 15 | Grammar gap (GROUP BY expr) | M | +| MatchIT text/keyword | 11 | SQL plugin | M (UDT) | +| DateTimeFunctionIT | 7 | AE-side | — | +| WindowFunctionIT (DISTINCT COUNT) | 7 | Pre-existing / Calcite limitation | M | +| Other (field not found, identifier, etc.) | 4 | Various | S-M | + +--- + +## Remaining Gaps by Priority + +### High Priority (SQL plugin, fixable) + +| Issue | Tests | Fix | +|-------|-------|-----| +| text vs keyword type | 27 | New EXPR_TEXT UDT or override getFamily() | +| WildcardQuery push-down | 18 | Implement relevance function push-down rule | +| AVG result type (double→timestamp) | 13 | Cast AVG result back to original temporal type | +| Post-aggregate field resolution | 11+ | Match sub-expressions against group key fields | + +### Medium Priority (pre-existing / Calcite limitations) + +| Issue | Tests | Fix | +|-------|-------|-----| +| Window DISTINCT COUNT | 7 | Calcite limitation — may need custom aggregate | +| EXISTS subquery | 2 | Grammar gap (A7) | + +### Reclassified to AE (not SQL plugin) + +| Issue | Tests | +|-------|-------| +| DATE_PART(string, string?) | 22 | +| DateTimeComparison cross-type | 24 | +| to_timestamp(string, string) | 10 | +| convert_tz(string, string, string) | 9 | +| MOD/LOG10/LN type mismatch | 8 | +| /(timestamp, bigint) | 3 | + +--- + +## Test Classes — Full Results + +### 100% Pass (21 classes) +ArithmeticFunctionIT, ConvertTZFunctionIT, DateTimeImplementationIT, LikeQueryIT, MatchPhraseIT, MatchPhrasePrefixIT, MathematicalFunctionIT, MultiMatchIT, NowLikeFunctionIT, NullLiteralIT, ObjectFieldSelectIT, PositionFunctionIT, QueryIT, QueryStringIT, RelevanceFunctionIT, ScoreQueryIT, SimpleQueryStringIT, StringLiteralIT, TextFunctionIT + +### Partial Pass (13 classes) +| Class | Pass/Total | Rate | +|-------|-----------|------| +| DateTimeComparisonIT | 167/191 | 87% | +| DateTimeFunctionIT | 67/74 | 91% | +| AggregationIT | 38/54 | 70% | +| DateTimeFormatsIT | 10/11 | 91% | +| ConditionalIT | 7/12 | 58% | +| IdentifierIT | 9/10 | 90% | +| ComplexTimestampQueryIT | 4/6 | 67% | +| QueryValidationIT | 2/5 | 40% | +| MatchIT | 3/14 | 21% | +| MatchBoolPrefixIT | 1/3 | 33% | +| SystemFunctionIT | 1/2 | 50% | +| WindowFunctionIT | 1/8 | 12% | +| MethodQueryIT (legacy) | 2/7 | 29% | + +### 0% Pass (3 classes) +| Class | Tests | Reason | +|-------|-------|--------| +| WildcardQueryIT | 18 | Push-down not implemented | +| ExistsPushdownIT | 2 | Grammar gap (EXISTS subquery) | +| AggregationExpressionIT (legacy) | 16 | GROUP BY expression field resolution | diff --git a/docs/report/sql-v2-calcite-complete-gap-table.md b/docs/report/sql-v2-calcite-complete-gap-table.md new file mode 100644 index 00000000000..d170bce056a --- /dev/null +++ b/docs/report/sql-v2-calcite-complete-gap-table.md @@ -0,0 +1,68 @@ +# SQL V2 + CalciteRelNodeVisitor: Complete Gap Table + +**Date**: 2026-05-12 +**Pass Rate**: 650 / 964 (67.4%) with S-effort fixes + grammar extensions applied +**Branch**: https://github.com/dai-chen/sql-1/tree/sql-v2-calcite-gap-analysis + +## Complete SQL Gap Table + +| # | Gap | Failures | Example Query | SQL-Only? | Origin | Status | +|---|-----|----------|---------------|-----------|--------|--------| +| **Grammar Gaps (cannot parse)** | | | | | | | +| 1 | SHOW/DESCRIBE with unquoted identifiers | 11 | `SHOW TABLES LIKE opensearch-sql_test_index_account` | ✅ SQL-only | SQL Plugin | Grammar expects STRING_LITERAL | +| 2 | Comma-join / nested FROM (PartiQL) | 5 | `SELECT * FROM nested_type e, e.message m` | ✅ SQL-only | SQL Plugin | Grammar gap | +| 3 | PERCENTILES / STATS / EXTENDED_STATS | 5 | `SELECT PERCENTILES(age, 25.0, 50.0) FROM account` | ✅ SQL-only | SQL Plugin | Legacy functions not in grammar | +| 4 | Index names with dots | 3 | `SELECT * FROM test-logs-2025.01.01` | ✅ SQL-only | SQL Plugin | Use backticks | +| 5 | Legacy method syntax (REGEXP_QUERY, wildcardQuery) | 3 | `SELECT * FROM account WHERE address=REGEXP_QUERY('.*')` | ✅ SQL-only | SQL Plugin | Legacy syntax | +| 6 | Qualified wildcard `m.*` | 2 | `SELECT m.* FROM nested_type e, e.message m` | ✅ SQL-only | SQL Plugin | Grammar gap | +| 7 | Index/type syntax | 1 | `SELECT * FROM account/wrongType` | ✅ SQL-only | SQL Plugin | Legacy syntax | +| 8 | Subquery EOF parse | 1 | `SELECT col FROM (SELECT * FROM table)` with pagination | ✅ SQL-only | SQL Plugin | Edge case | +| 9 | Scalar subquery in SELECT | 0 (no test) | `SELECT (SELECT MAX(x) FROM t2) FROM t1` | ✅ SQL-only | SQL Plugin | Not in grammar; visitor ready | +| 10 | EXCEPT/MINUS | 0 (throws) | `SELECT a FROM t1 EXCEPT SELECT a FROM t2` | ✅ SQL-only | SQL Plugin | Grammar added, visitor throws | +| | | | | | | | +| **Visitor/Execution Gaps (parses but fails)** | | | | | | | +| 11 | Field [X] not found in aggregation Project | 36 | `SELECT MAX(balance), AVG(age) FROM account GROUP BY gender` | ✅ SQL-only | SQL Plugin | SQL wraps agg in Project; PPL doesn't | +| 12 | NESTED function not registered | 33 | `SELECT nested(message.author.name) FROM multi_nested` | ❌ Shared | SQL Plugin + AE | PPL also uses nested() | +| 13 | vectorSearch() table function | 30 | `SELECT * FROM vectorSearch(table='t', field='f', vector='[1]', option='k=5')` | ✅ SQL-only | AE | AE needs TableFunctionScan support | +| 14 | Pagination schema mismatch | 26 | `SELECT * FROM bank` (with fetchSize=5) | ❌ Shared | SQL Plugin | Calcite path has no pagination | +| 15 | RexNode NPE (nested wildcard) | 20 | `SELECT nested(message.*) FROM nested_type` | ❌ Shared | SQL Plugin | Part of NESTED gap | +| 16 | WILDCARD_QUERY not registered | 18 | `SELECT body FROM test WHERE wildcard_query(KeywordBody, 't*')` | ✅ SQL-only | SQL Plugin + AE | Needs function registration + AE DSL mapping | +| 17 | AVG on temporal types rejected | 13 | `SELECT AVG(datetime1) FROM calcs` | ❌ Shared | SQL Plugin | Type checker too strict | +| 18 | Schema type mismatch (keyword vs text) | 11 | `SELECT firstname FROM accounts WHERE match(firstname, 'Amber')` | ✅ SQL-only | SQL Plugin | Response metadata issue | +| 19 | Explain format assertions | 11 | `EXPLAIN SELECT * FROM bank WHERE age > 30` | ✅ SQL-only | SQL Plugin | Tests expect V2 explain format | +| 20 | JOIN explain alias mismatch | 11 | `EXPLAIN SELECT a.id, b.name FROM order a JOIN bank b ON a.id = b.id` | ✅ SQL-only | SQL Plugin | Alias not propagated in explain | +| 21 | Column name = alias instead of expression | 10 | `SELECT NULLIF(lastname, 'unknown') AS name FROM accounts` | ✅ SQL-only | SQL Plugin | Calcite uses alias as column label | +| 22 | Window: ROW_NUMBER still failing | 3 | `SELECT ROW_NUMBER() OVER(ORDER BY age) FROM bank` | ❌ Shared | SQL Plugin | Mapping added but not taking effect | +| 23 | Window: PERCENTILE over window | 3 | `SELECT PERCENTILE(balance, 50) OVER() FROM bank` | ❌ Shared | SQL Plugin | Not in WINDOW_FUNC_MAPPING | +| 24 | Pagination cursor unsupported | 5 | `SELECT * FROM bank LIMIT 5` (with fetchSize, cursor iteration) | ❌ Shared | SQL Plugin | Calcite has no cursor support | +| 25 | Field resolution misc | 3 | `SELECT birthdate FROM account` / concurrent queries | ✅ SQL-only | SQL Plugin | Same root cause as #11 | +| 26 | Error message format mismatch | 2 | Tests expect "IndexNotFoundException" but get "SyntaxCheckException" | ✅ SQL-only | SQL Plugin | Test assertion issue | +| 27 | Expected exception not thrown | 2 | Self-join without alias — Calcite handles it, test expects error | ✅ SQL-only | SQL Plugin | Calcite more lenient | +| 28 | Explain plan node IDs differ | 1 | `EXPLAIN SELECT ... ORDER BY ...` — non-deterministic rel#IDs | ✅ SQL-only | SQL Plugin | Brittle test | +| | | | | | | | +| **Code-level gaps (not surfaced by tests)** | | | | | | | +| 29 | NOT LIKE function | 0 | `SELECT * FROM t WHERE name NOT LIKE '%test%'` | ❌ Shared | SQL Plugin | Not registered in PPLFuncImpTable | +| 30 | INTERVAL compound units | 0 | `SELECT date + INTERVAL 1 DAY_HOUR FROM t` | ✅ SQL-only | SQL Plugin | PlanUtils throws UnsupportedOp | +| 31 | INTERVAL with non-literal expression | 0 | `SELECT date + INTERVAL col DAY FROM t` | ✅ SQL-only | SQL Plugin | NumberFormatException | +| 32 | `\|\|` string concatenation | 0 | `SELECT 'hello' \|\| ' world'` | ✅ SQL-only | SQL Plugin | Not in V2 grammar | +| 33 | RelationSubquery (derived table alias lost) | 0 | `SELECT * FROM (SELECT a, b FROM t) AS sub` | ✅ SQL-only | SQL Plugin | No visitRelationSubquery | + +## Summary + +| Category | Failures | SQL-Only | Shared with PPL | +|----------|----------|----------|-----------------| +| Grammar gaps | 31 | 31 | 0 | +| Visitor/execution gaps | 204 | 128 | 76 | +| Code-level (no test) | 0 | 4 | 1 | +| **Total** | **283** (+5 code) | **163** | **77** | + +## For AE Team: SQL-Specific RelNode/Operator Support Needed (Beyond PPL) + +| # | What AE Needs Beyond PPL | RelNode/Operator | Example | +|---|--------------------------|-----------------|---------| +| 1 | vectorSearch() table function | `TableFunctionScan` | `FROM vectorSearch(table='t', ...)` | +| 2 | WILDCARD_QUERY in filter | `RexCall(WILDCARD_QUERY, field, pattern)` | `WHERE wildcard_query(body, 'test*')` | +| 3 | Aggregation with expression Project | `LogicalProject` over `LogicalAggregate` with field refs | `SELECT MAX(balance+1) FROM t GROUP BY age` | +| 4 | INTERVAL compound units | `RexLiteral(INTERVAL DAY TO HOUR)` | `date + INTERVAL 1 DAY_HOUR` | + +Everything else (JOIN, UNION, IN/EXISTS, filter, sort, window, basic aggregation) produces identical RelNode types as PPL — no additional AE work needed. diff --git a/docs/report/sql-v2-calcite-gap-analysis.md b/docs/report/sql-v2-calcite-gap-analysis.md new file mode 100644 index 00000000000..d29322bcfad --- /dev/null +++ b/docs/report/sql-v2-calcite-gap-analysis.md @@ -0,0 +1,250 @@ +# SQL V2 → CalciteRelNodeVisitor: Complete Gap Analysis + +**Date**: 2026-05-12 +**Pass Rate**: 572 / 964 (59.3%) with all fallback paths disabled +**Fixes Applied**: Alias in project list, Cursor.None, REST fallback disabled +**Branch**: https://github.com/dai-chen/sql-1/tree/sql-v2-calcite-gap-analysis + +## Background + +This analysis evaluates the feasibility of connecting the SQL V2 parser/AST builder to the existing PPL Calcite planner (`CalciteRelNodeVisitor`) — bypassing the V2 Analyzer → Planner → PhysicalPlan path entirely. All fallback paths (Calcite → V2, V2 → legacy) were disabled to expose the complete set of gaps. + +The motivation is to provide data for the **Mustang SQL** design decision: integrating the SQL plugin with the Analytics Engine in OpenSearch core on Parquet index **without any fallback path to Lucene index**. One option under consideration is to reuse the V2 parser and AST builder for maximum SQL compatibility, routing the resulting AST through CalciteRelNodeVisitor (which already powers PPL V3) to produce Calcite RelNode plans. This report identifies exactly what works, what doesn't, and what effort is required to close each gap — informing whether this reuse strategy is viable or whether alternative approaches (e.g., Calcite's native SQL parser) are preferable for Mustang. + +### Alternative Path Evaluated: Calcite Native Parser + UnifiedQueryCompiler + +For comparison, we also tested routing all SQL through `RestUnifiedQueryAction` using Calcite's native `SqlParser` → `SqlToRelConverter` → `UnifiedQueryCompiler` (direct Calcite Bindable execution). Result: **11 / 1007 (1.1%) pass rate**. The test cluster crashed mid-run due to an unhandled `ExceptionInInitializerError` (fatal JVM crash from `log(0)` in Calcite Enumerable execution). Before the crash, 198 queries failed at planning (Calcite parser can't handle OpenSearch-specific functions/syntax) and 143 failed at compilation (RelRunner execution failures). This path is fundamentally not ready — it lacks error resilience, doesn't leverage OpenSearch push-down, and has far worse compatibility than the V2+CalciteRelNodeVisitor approach. + +--- + +## Category A: V2 Grammar / Parser Gaps + +These queries **cannot be parsed** by the V2 ANTLR grammar (`OpenSearchSQLParser.g4`). They fail with `SyntaxCheckException` before reaching CalciteRelNodeVisitor. + +| # | Gap | Failures | Example Query | Effort | Solution | +|---|-----|----------|---------------|--------|----------| +| A1 | **JOIN** (INNER, LEFT, cross) | 19 | `SELECT a.firstname FROM bank a INNER JOIN order b ON a.name = b.name` | L | Extend V2 grammar with JOIN clause + AstBuilder visitJoin. `Join` AST node and `CalciteRelNodeVisitor.visitJoin` already exist — only grammar + AstBuilder missing | +| A2 | **SHOW/DESCRIBE with unquoted hyphenated identifiers** | 11 | `SHOW TABLES LIKE opensearch-sql_test_index_account` | S | V2 grammar supports SHOW/DESCRIBE but `tableFilter` expects `STRING_LITERAL`. Tests pass unquoted identifiers. Fix: extend `tableFilter` to accept ID | +| A3 | **Nested FROM syntax** (PartiQL comma-join) | 7 | `SELECT * FROM nested_type e, e.message m` | M | Extend grammar FROM clause to allow comma-separated relations + nested path expressions | +| A4 | **PERCENTILES / STATS / EXTENDED_STATS** | 7 | `SELECT PERCENTILES(age, 25.0, 50.0) FROM account` | M | Tokens exist in lexer but unused in parser. Add parser rules. Note: `PERCENTILE` (singular) IS supported | +| A5 | **Legacy method syntax** (`field=FUNC('arg')`) | 5 | `SELECT * FROM account WHERE address=REGEXP_QUERY('.*')` | M | Only MATCH_QUERY/MATCHPHRASE have `altSingleFieldRelevanceFunction` rule. REGEXP_QUERY does not. Add to grammar or use standard function-call syntax | +| A6 | **Index names with dots** | 3 | `SELECT * FROM test-logs-2025.01.01` | S | Grammar splits on dots into `qualifiedName` parts. `01` fails as ident (starts with digit). Require backtick quoting (already works with backticks) | +| A7 | **IN / EXISTS / NOT EXISTS subquery** | 0 (excluded) | `WHERE age IN (SELECT age FROM bank2 WHERE age > 30)` | L | `IN` predicate only accepts expression list, not subquery. `EXISTS` token defined in lexer but unused in parser. Add `IN '(' querySpecification ')'` and `EXISTS '(' querySpecification ')'` rules | +| A8 | **Qualified wildcard** (`alias.*`) | 2 | `SELECT m.* FROM nested_type e, e.message m` | S | Add qualifiedStar rule to selectElement in grammar | +| A9 | **Multi-table FROM** (comma-join, non-nested) | 1 | `SELECT * FROM dog_index, dogs2_index` | S | Allow comma-separated table list in FROM clause | +| A10 | **date_histogram** | 1 | `SELECT count(*) FROM online GROUP BY date_histogram('field'='insert_time','fixed_interval'='4d')` | S | Add date_histogram to function name tokens in parser | +| A11 | **Index/type syntax** (`/type`) | 1 | `SELECT * FROM account/wrongType` | S | Add optional `/type` suffix to table reference rule | +| A12 | **UNION / INTERSECT / MINUS** | 0 (excluded) | `SELECT name FROM bank UNION ALL SELECT dog_name FROM dog` | L | Add set-operation rules to grammar + AST Append/Union nodes | +| A13 | **Correlated subquery** | 0 (excluded) | `WHERE EXISTS (SELECT * FROM dept d WHERE d.id = e.dept_id)` | L | Requires A7 first + decorrelation support in planner | + +**Total Category A failures: ~57 (active) + 3 hidden gaps with 0 active tests (A7, A12, A13)** + +**Note on subqueries**: V2 grammar supports derived tables (`FROM (SELECT ...) AS alias`) via the `subqueryAsRelation` rule. CalciteRelNodeVisitor has `visitSubqueryAlias` for this. However, `IN (SELECT ...)`, `EXISTS (SELECT ...)`, and correlated subqueries are not in the V2 grammar — the `EXISTS` token is defined in the lexer but unused in any parser rule, and `IN` only accepts expression lists, not nested SELECTs. The legacy `SubqueryIT` (18 tests) is entirely excluded from the build. + +--- + +## Category B: CalciteRelNodeVisitor Execution Gaps + +These queries parse successfully into AST but fail during Calcite visitor traversal or execution. + +| # | Gap | Failures | Example Query | Effort | Solution | +|---|-----|----------|---------------|--------|----------| +| B1 | **Aggregation ArgumentMap NPE** (BUCKET_NULLABLE) | 82 | `SELECT MAX(datetime1), MIN(datetime1) FROM calcs` | S | Null-check `statsArgs.get(Argument.BUCKET_NULLABLE)` at CalciteRelNodeVisitor.java:1612. SQL's `Aggregation` has empty `argExprList` (PPL sets BUCKET_NULLABLE). Default to `false`. | +| B2 | **AggregateFunction → Function cast** | 29 | `SELECT AVG(num3) OVER(PARTITION BY datetime1) FROM calcs` | S | Add `instanceof AggregateFunction` branch at CalciteRexNodeVisitor.java:564. `AggregateFunction` and `Function` are sibling classes (both extend `UnresolvedExpression`), not parent-child. | +| B3 | **Pagination unsupported** (schema mismatch + cursor) | 35 | `SELECT * FROM bank` (with fetchSize) | L | `visitPaginate()` and `visitFetchCursor()` both throw `CalciteUnsupportedException`. Non-paged goes through Calcite; paged falls back to V2 → different schemas. Implement pagination in Calcite or ensure schema consistency. | +| B4 | **Table function unsupported** | 26 | `SELECT v._id FROM vectorSearch(table='t', field='f', vector='[1.0]', option='k=5') AS v` | L | `visitTableFunction()` is a complete stub (single throw). Implement KNN scan translation. | +| B5 | **NESTED function** | 33 | `SELECT nested(message.author.name) FROM multi_nested` | L | NESTED not registered in `PPLFuncImpTable.java`. Requires special handling — not a simple function, needs unnest/correlate pattern for OpenSearch nested field access. | +| B6 | **WILDCARD_QUERY** | 18 | `SELECT body FROM wildcard WHERE wildcard_query(KeywordBody, 't*')` | M | Not registered in `PPLFuncImpTable.java`. Register and map to OpenSearch wildcard query DSL. | +| B7 | **Relevance function aliases** (MATCH_QUERY, MATCHPHRASE, MATCHQUERY) | 14 | `SELECT firstname FROM accounts WHERE match_query(lastname, 'Bates')` | S | `PPLFuncImpTable` registers MATCH, MATCH_PHRASE, MULTI_MATCH but NOT the aliases (MATCH_QUERY, MATCHQUERY, MATCHPHRASE). Add alias registrations pointing to same operators. | +| B8 | **LIMIT not applied** (visitLimit missing) | 9 | `SELECT last_day(date0) FROM calcs LIMIT 3` → returns 17 rows | S | `CalciteRelNodeVisitor` does NOT override `visitLimit()`. Default `visitChildren()` silently skips it. Add `visitLimit(Limit node, ctx)` calling `context.relBuilder.limit(offset, count)`. | +| B9 | **Window functions** (ROW_NUMBER, RANK, DENSE_RANK) | 8 | `SELECT age, ROW_NUMBER() OVER(ORDER BY age DESC) FROM bank` | S | `WINDOW_FUNC_MAPPING` in `BuiltinFunctionName.java` is missing ranking functions. `PlanUtils.makeOver()` already has `case ROW_NUMBER` handler. Just add to mapping. | +| B10 | **Table name resolution** (dot = schema separator) | 6 | `SELECT __age FROM test.twounderscores` → `Table 'test.twounderscores' not found` | S | `visitRelation` passes `QualifiedName.getParts()` (split on dots) to `relBuilder.scan()`. Calcite interprets `["test","twounderscores"]` as schema.table. Fix: pass resolved name as single-element list. | +| B11 | **QUERY function** | 5 | `SELECT Id FROM beer WHERE query('Tags:taste OR Body:taste')` | M | Not registered in `PPLFuncImpTable.java`. Register and map to OpenSearch query_string DSL. | +| B12 | **Schema type mismatch** (keyword vs text) | 5 | `SELECT phrase FROM phrase WHERE match_bool_prefix(phrase, 'quick')` — schema says `keyword` not `text` | S | Calcite schema resolves field type from mapping incorrectly. Fix type resolution in OpenSearchSchema. | +| B13 | **Column name = alias instead of expression** | 6 | `SELECT NULLIF(lastname, 'unknown') AS name` → schema name is `name` not `NULLIF(...)` | S | Calcite uses the AS alias as column label. V2 uses expression text. Align behavior in response formatter. | +| B14 | **ISNULL / IFNULL unresolved** | 5 | `SELECT ISNULL(lastname) FROM accounts` | S | ISNULL (1-arg boolean) not registered. IFNULL fails with NULL literal (UNDEFINED type). Register + fix type resolution. | +| B15 | **Explain format** | 3 | `EXPLAIN SELECT age FROM account WHERE age IS NOT NULL` | S | Calcite explain outputs different JSON structure. Test regex doesn't match. Fix test or add V2-compatible explain format. | +| B16 | **ArgumentMap NPE** (systemic — PI, GROUP BY, HAVING) | 4 | `SELECT PI()` / `SELECT col FROM t GROUP BY col HAVING COUNT(1) > 0` | S | Same root cause as B1 — ArgumentMap access without null-check in multiple visitor methods. Fix alongside B1. | +| B17 | **Pagination fallback tests** | 5 | `SELECT * FROM bank LIMIT 5` (with fetchSize) | M | Tests verify fallback behavior. With fallback disabled, they fail. Test-infrastructure issue. | +| B18 | **Type name mismatch** (SQL standard vs OpenSearch) | 1 | `SELECT typeof(age)` → returns `BIGINT` instead of `LONG` | S | Calcite uses SQL standard type names. Map back to OpenSearch names in typeof() implementation. | +| B19 | **LIMIT off-by-one** | 1 | `SELECT point FROM geopoints LIMIT 5` → returns 6 rows | S | Off-by-one in SystemLimit or LIMIT application. Related to B8 (visitLimit missing). | + +**Total Category B failures: ~295** + +--- + +## Summary Statistics + +| Category | Active Failures | Hidden Gaps (0 tests) | Key Blocker | +|----------|----------------|----------------------|-------------| +| A (Grammar) | ~57 | 3 (IN/EXISTS, UNION, correlated) | JOINs (19) — visitor ready, grammar missing | +| B (Visitor) | ~295 | — | ArgumentMap NPE (86), NESTED (33), AggFunc cast (29) | + +## Effort Legend + +- **S** (Small): < 1 day. Null-check, register function alias, fix config. +- **M** (Medium): 1–3 days. New visitor method, function implementation with DSL mapping. +- **L** (Large): 3+ days. Grammar extension, new execution path, architectural change. + +## Quick Wins (S effort, high impact) + +| Fix | Effort | Tests Unlocked | File | +|-----|--------|----------------|------| +| B1+B16: Null-check BUCKET_NULLABLE in visitAggregation | S | ~86 | CalciteRelNodeVisitor.java:1612 | +| B2: instanceof AggregateFunction in visitWindowFunction | S | ~29 | CalciteRexNodeVisitor.java:564 | +| B8+B19: Add visitLimit() override | S | ~10 | CalciteRelNodeVisitor.java (new method) | +| B9: Add ROW_NUMBER/RANK/DENSE_RANK to WINDOW_FUNC_MAPPING | S | ~8 | BuiltinFunctionName.java | +| B7: Register MATCH_QUERY/MATCHQUERY/MATCHPHRASE aliases | S | ~14 | PPLFuncImpTable.java | +| B10: Pass resolved name as single-element list to scan() | S | ~6 | CalciteRelNodeVisitor.java visitRelation | +| B14: Register ISNULL + fix IFNULL null-type | S | ~5 | PPLFuncImpTable.java | +| **Total quick wins** | **~2 days** | **~158 tests** | | + +## Architectural Notes + +1. **JOIN path is ready**: `Join` AST node exists, `CalciteRelNodeVisitor.visitJoin()` is implemented. Only the V2 SQL grammar rule + AstBuilder visitor method are needed to connect SQL JOINs to the existing Calcite path. + +2. **Pagination is the biggest L-effort gap**: B3 (35 failures) requires implementing cursor-based pagination in the Calcite execution engine — a fundamentally new capability. + +3. **Function registration pattern**: Most unresolved function gaps (B6, B7, B11, B14) follow the same pattern — register in `PPLFuncImpTable.java` and map to appropriate Calcite SqlOperator or custom implementation. + +--- + +## Appendix: Fixes Applied for This Analysis + +The following minimal fixes were applied to enable the gap analysis. They are intentionally temporary and non-production-quality. + +### Fix 1: Route SQL through CalciteRelNodeVisitor + +**File**: `core/src/main/java/org/opensearch/sql/executor/QueryService.java` + +```java +// Before: +private boolean shouldUseCalcite(QueryType queryType) { + return isCalciteEnabled(settings) && queryType == QueryType.PPL; +} + +// After: +private boolean shouldUseCalcite(QueryType queryType) { + return isCalciteEnabled(settings) + && (queryType == QueryType.PPL || queryType == QueryType.SQL); +} +``` + +### Fix 2: Enable Calcite + disable fallback in IT tests + +**File**: `integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java` + +```java +// Before: +protected void init() throws Exception { + disableCalcite(); + increaseMaxCompilationsRate(); +} + +// After: +protected void init() throws Exception { + enableCalcite(); + disallowCalciteFallback(); + increaseMaxCompilationsRate(); +} +``` + +### Fix 3: Handle Alias expression in project list + +**File**: `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java` + +Added `case Alias` to the switch in `expandProjectFields()`: + +```java +case Alias alias -> { + expandedFields.add(rexVisitor.analyze(alias, context)); +} +``` + +This dispatches to the existing `CalciteRexNodeVisitor.visitAlias()` which correctly unwraps the delegated expression and applies the alias name. + +### Fix 4: Return Cursor.None instead of null + +**File**: `opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java` + +```java +// Before: +QueryResponse response = new QueryResponse(schema, values, null); + +// After: +QueryResponse response = new QueryResponse(schema, values, Cursor.None); +``` + +### Fix 5: Disable REST-level V2 → legacy fallback + +**File**: `legacy/src/main/java/org/opensearch/sql/legacy/plugin/RestSQLQueryAction.java` + +Disabled the `SyntaxCheckException` catch in `fallBackListener.onFailure()` to surface all grammar-level failures instead of silently routing to legacy engine. + +--- + +## Appendix B: Function Registration Status + +**278 total functions** in `BuiltinFunctionName`. Summary: + +| Status | Count | Details | +|--------|-------|---------| +| PPLFuncImpTable (registered in Calcite) | 237 | All math, string, date, arithmetic, boolean, aggregate, collection, JSON, crypto | +| visitCast (handled by cast visitor) | 14 | All CAST_TO_* type conversions | +| makeOver (window function utility) | 2 | ROW_NUMBER, NTH_VALUE | +| Special visitor method | 2 | HIGHLIGHT, SCORE (throw unsupported) | +| External (OpenSearchExecutionEngine) | 2 | GEOIP, DISTINCT_COUNT_APPROX | +| GAP IN REPORT (missing, identified) | 13 | See below | +| NOT REGISTERED (missing, not in report) | 8 | See below | + +### Functions Identified as Gaps (13) + +| Function | SQL Name | Gap ID | +|----------|----------|--------| +| NESTED | `nested` | B5 | +| ISNULL | `isnull` | B14 | +| RANK | `rank` | B9 | +| DENSE_RANK | `dense_rank` | B9 | +| MATCHPHRASE | `matchphrase` | B7 | +| MATCHPHRASEQUERY | `matchphrasequery` | B7 | +| QUERY | `query` | B11 | +| MATCH_QUERY | `match_query` | B7 | +| MATCHQUERY | `matchquery` | B7 | +| MULTIMATCH | `multimatch` | B7 | +| MULTIMATCHQUERY | `multimatchquery` | B7 | +| WILDCARDQUERY | `wildcardquery` | B6 | +| WILDCARD_QUERY | `wildcard_query` | B6 | + +### Functions NOT REGISTERED (8) — Not in Gap Report + +| Function | SQL Name | Impact | Notes | +|----------|----------|--------|-------| +| TAN | `tan` | Low | `SqlStdOperatorTable.TAN` exists, trivial to add | +| NOT_LIKE | `not like` | Negligible | Handled by `NOT` + `LIKE` composition at query level | +| SCOREQUERY | `scorequery` | Low | Legacy alias for SCORE (which throws unsupported) | +| SCORE_QUERY | `score_query` | Low | Legacy alias for SCORE | +| BRAIN | `brain` | None for SQL | Internal PPL patterns command function | +| INTERVAL | `interval` | None | Handled implicitly by date arithmetic literals | +| NONE | `none` | None for SQL | PPL convert command no-op | +| INTERNAL_UNCOLLECT_PATTERNS | `uncollect_patterns` | None | Defined but never referenced anywhere | + +### Registered Functions by Category (237 in PPLFuncImpTable) + +| Category | Count | Examples | +|----------|-------|---------| +| Math | 26 | ABS, CEIL, FLOOR, ROUND, SQRT, LOG, EXP, POW, PI, RAND | +| Trigonometric | 11 | ACOS, ASIN, ATAN, ATAN2, COS, COSH, COT, SIN, SINH, DEGREES, RADIANS | +| Arithmetic | 11 | +, -, *, /, %, MOD, ADD, SUBTRACT, MULTIPLY, DIVIDE | +| Boolean/Comparison | 12 | AND, OR, NOT, XOR, =, !=, <, <=, >, >=, LIKE, ILIKE | +| String | 23 | CONCAT, SUBSTRING, TRIM, UPPER, LOWER, LENGTH, REPLACE, REVERSE, LOCATE | +| Date/Time | 63 | NOW, CURDATE, DATE_FORMAT, DATEDIFF, TIMESTAMPADD, EXTRACT, YEAR, MONTH, DAY | +| Aggregate | 18 | AVG, SUM, COUNT, MIN, MAX, STDDEV_POP, STDDEV_SAMP, VAR_POP, VAR_SAMP, PERCENTILE_APPROX | +| Conditional | 8 | IF, IFNULL, NULLIF, COALESCE, IS_NULL, IS_NOT_NULL, IS_PRESENT, IS_EMPTY | +| Collection | 20 | ARRAY, SPLIT, MVAPPEND, MVJOIN, MVINDEX, JSON_EXTRACT, JSON_KEYS | +| JSON | 12 | JSON_VALID, JSON_OBJECT, JSON_ARRAY, JSON_SET, JSON_DELETE | +| Binning | 4 | SPAN_BUCKET, WIDTH_BUCKET, MINSPAN_BUCKET, RANGE_BUCKET | +| Crypto | 3 | MD5, SHA1, SHA2 | +| Relevance | 7 | MATCH, MATCH_PHRASE, MULTI_MATCH, SIMPLE_QUERY_STRING, QUERY_STRING, MATCH_BOOL_PREFIX, MATCH_PHRASE_PREFIX | +| Convert/Type | 10 | AUTO, NUM, CTIME, MKTIME, TYPEOF, TOSTRING, TONUMBER | +| Internal | 9 | ITEM, PATTERN_PARSER, GROK, PARSE, REGEXP_REPLACE | +| Other | 2 | CIDRMATCH, SPAN | diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java index 099b2f7e0cb..888ff7d6812 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java @@ -11,6 +11,8 @@ import static org.opensearch.sql.legacy.plugin.RestSqlAction.EXPLAIN_API_ENDPOINT; import static org.opensearch.sql.legacy.plugin.RestSqlAction.QUERY_API_ENDPOINT; import static org.opensearch.sql.ppl.PPLIntegTestCase.disableCalcite; +import static org.opensearch.sql.ppl.PPLIntegTestCase.disallowCalciteFallback; +import static org.opensearch.sql.ppl.PPLIntegTestCase.enableCalcite; import com.google.common.base.Strings; import java.io.IOException; @@ -193,7 +195,8 @@ protected void resetMaxResultWindow(String indexName) throws IOException { /** Provide for each test to load test index, data and other setup work */ protected void init() throws Exception { - disableCalcite(); + enableCalcite(); + disallowCalciteFallback(); increaseMaxCompilationsRate(); } diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/plugin/RestSQLQueryAction.java b/legacy/src/main/java/org/opensearch/sql/legacy/plugin/RestSQLQueryAction.java index 21badf79412..a54c53e9a7f 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/plugin/RestSQLQueryAction.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/plugin/RestSQLQueryAction.java @@ -131,11 +131,8 @@ public void onResponse(T response) { @Override public void onFailure(Exception e) { - if (e instanceof SyntaxCheckException || e instanceof UnsupportedCursorRequestException) { - fallBackHandler.accept(channel, e); - } else { - next.onFailure(e); - } + // Disable fallback to legacy — surface all failures for gap analysis + next.onFailure(e); } }; } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java index 32b7891d344..96cc14e0f69 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java @@ -53,6 +53,7 @@ import org.opensearch.sql.executor.ExecutionEngine; import org.opensearch.sql.executor.ExecutionEngine.Schema.Column; import org.opensearch.sql.executor.Explain; +import org.opensearch.sql.executor.pagination.Cursor; import org.opensearch.sql.executor.pagination.PlanSerializer; import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.expression.function.PPLFuncImpTable; @@ -291,11 +292,14 @@ private QueryResponse buildResultSet( Map row = new LinkedHashMap(); // Loop through each column for (int i = 1; i <= columnCount; i++) { - String columnName = metaData.getColumnName(i); - Object value = resultSet.getObject(columnName); + String rawColumnName = metaData.getColumnName(i); + // Use alias as row key if encoded (matches getColumnName in QueryResult) + int sepIdx = rawColumnName.indexOf('\0'); + String rowKey = sepIdx >= 0 ? rawColumnName.substring(sepIdx + 1) : rawColumnName; + Object value = resultSet.getObject(rawColumnName); Object converted = processValue(value, fieldTypes.get(i - 1)); ExprValue exprValue = ExprValueUtils.fromObjectValue(converted); - row.put(columnName, exprValue); + row.put(rowKey, exprValue); } values.add(ExprTupleValue.fromExprValueMap(row)); } @@ -303,6 +307,14 @@ private QueryResponse buildResultSet( List columns = new ArrayList<>(metaData.getColumnCount()); for (int i = 1; i <= columnCount; ++i) { String columnName = metaData.getColumnName(i); + // Decode expression name and alias encoded by CalciteRexNodeVisitor.visitAlias + // Format: "exprName\0alias" for SQL queries with AS-alias + String alias = null; + int sep = columnName.indexOf('\0'); + if (sep >= 0) { + alias = columnName.substring(sep + 1); + columnName = columnName.substring(0, sep); + } RelDataType fieldType = fieldTypes.get(i - 1); // TODO: Correct this after fixing issue github.com/opensearch-project/sql/issues/3751 // The element type of struct and array is currently set to ANY. @@ -310,7 +322,9 @@ private QueryResponse buildResultSet( ExprType exprType; if (fieldType.getSqlTypeName() == SqlTypeName.ANY) { if (!values.isEmpty()) { - exprType = values.getFirst().tupleValue().get(columnName).type(); + // Row map is keyed by alias (if present) or columnName + String lookupKey = alias != null ? alias : columnName; + exprType = values.getFirst().tupleValue().get(lookupKey).type(); } else { // Using UNDEFINED instead of UNKNOWN to avoid throwing exception exprType = ExprCoreType.UNDEFINED; @@ -318,10 +332,10 @@ private QueryResponse buildResultSet( } else { exprType = OpenSearchTypeFactory.convertRelDataTypeToExprType(fieldType); } - columns.add(new Column(columnName, null, exprType)); + columns.add(new Column(columnName, alias, exprType)); } Schema schema = new Schema(columns); - QueryResponse response = new QueryResponse(schema, values, null); + QueryResponse response = new QueryResponse(schema, values, Cursor.None); return response; } diff --git a/sql/src/main/antlr/OpenSearchSQLParser.g4 b/sql/src/main/antlr/OpenSearchSQLParser.g4 index 6b34507eacc..aa0c4f383ca 100644 --- a/sql/src/main/antlr/OpenSearchSQLParser.g4 +++ b/sql/src/main/antlr/OpenSearchSQLParser.g4 @@ -54,7 +54,9 @@ dmlStatement // Primary DML Statements selectStatement - : querySpecification # simpleSelect + : querySpecification # simpleSelect + | querySpecification UNION ALL? querySpecification (UNION ALL? querySpecification)* # unionSelect + | querySpecification EXCEPT querySpecification # exceptSelect ; adminStatement @@ -104,10 +106,21 @@ selectElement ; fromClause - : FROM relation (whereClause)? (groupByClause)? (havingClause)? (orderByClause)? // Place it under FROM for now but actually not necessary ex. A UNION B ORDER BY + : FROM relation joinClause* (whereClause)? (groupByClause)? (havingClause)? (orderByClause)? // Place it under FROM for now but actually not necessary ex. A UNION B ORDER BY ; +joinClause + : joinType? JOIN relation (ON expression)? + ; + +joinType + : INNER + | LEFT OUTER? + | RIGHT OUTER? + | CROSS + ; + relation : tableName (AS? alias)? # tableAsRelation | LR_BRACKET subquery = querySpecification RR_BRACKET AS? alias # subqueryAsRelation @@ -302,6 +315,7 @@ predicate | left = predicate NOT? LIKE right = predicate # likePredicate | left = predicate REGEXP right = predicate # regexpPredicate | predicate NOT? IN '(' expressions ')' # inPredicate + | predicate NOT? IN '(' selectStatement ')' # inSubqueryPredicate ; expressions @@ -313,6 +327,7 @@ expressionAtom | columnName # fullColumnNameExpressionAtom | functionCall # functionCallExpressionAtom | LR_BRACKET expression RR_BRACKET # nestedExpressionAtom + | EXISTS '(' selectStatement ')' # existsExpressionAtom | left = expressionAtom mathOperator = (STAR | SLASH | MODULE) right = expressionAtom # mathExpressionAtom | left = expressionAtom mathOperator = (PLUS | MINUS) right = expressionAtom # mathExpressionAtom ; diff --git a/sql/src/main/java/org/opensearch/sql/sql/SQLService.java b/sql/src/main/java/org/opensearch/sql/sql/SQLService.java index d51fa1c898d..1a5ead6d3f8 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/SQLService.java +++ b/sql/src/main/java/org/opensearch/sql/sql/SQLService.java @@ -9,7 +9,11 @@ import lombok.RequiredArgsConstructor; import org.antlr.v4.runtime.tree.ParseTree; +import org.opensearch.sql.ast.expression.Argument; +import org.opensearch.sql.ast.expression.UnresolvedExpression; import org.opensearch.sql.ast.statement.Statement; +import org.opensearch.sql.ast.tree.Join; +import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.common.response.ResponseListener; import org.opensearch.sql.executor.ExecutionEngine.ExplainResponse; import org.opensearch.sql.executor.ExecutionEngine.QueryResponse; @@ -20,6 +24,7 @@ import org.opensearch.sql.sql.antlr.SQLSyntaxParser; import org.opensearch.sql.sql.domain.SQLQueryRequest; import org.opensearch.sql.sql.parser.AstBuilder; +import org.opensearch.sql.sql.parser.AstExpressionBuilder; import org.opensearch.sql.sql.parser.AstStatementBuilder; /** SQL service. */ @@ -95,7 +100,92 @@ private AbstractPlan plan( Statement statement = cst.accept( new AstStatementBuilder( - new AstBuilder(request.getQuery()), + new AstBuilder(request.getQuery()) { + @Override + protected AstExpressionBuilder createExpressionBuilder() { + final var planBuilder = this; + return new AstExpressionBuilder() { + @Override + public UnresolvedExpression visitInSubqueryPredicate( + org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser + .InSubqueryPredicateContext + ctx) { + UnresolvedExpression field = visit(ctx.predicate()); + UnresolvedPlan subquery = planBuilder.visit(ctx.selectStatement()); + UnresolvedExpression inSub = + new org.opensearch.sql.ast.expression.subquery.InSubquery( + java.util.List.of(field), subquery); + return ctx.NOT() != null + ? org.opensearch.sql.ast.dsl.AstDSL.not(inSub) + : inSub; + } + + @Override + public UnresolvedExpression visitExistsExpressionAtom( + org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser + .ExistsExpressionAtomContext + ctx) { + UnresolvedPlan subquery = planBuilder.visit(ctx.selectStatement()); + return new org.opensearch.sql.ast.expression.subquery.ExistsSubquery( + subquery); + } + }; + } + + @Override + public UnresolvedPlan visitJoinClause( + org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.JoinClauseContext + ctx) { + UnresolvedPlan rightRelation = visit(ctx.relation()); + Join.JoinType joinType = resolveJoinType(ctx.joinType()); + java.util.Optional joinCondition = + ctx.expression() != null + ? java.util.Optional.of(visitAstExpression(ctx.expression())) + : java.util.Optional.empty(); + return new Join( + rightRelation, + java.util.Optional.empty(), + java.util.Optional.empty(), + joinType, + joinCondition, + new Join.JoinHint(), + java.util.Optional.empty(), + Argument.ArgumentMap.empty()); + } + + private Join.JoinType resolveJoinType( + org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.JoinTypeContext + ctx) { + if (ctx == null) return Join.JoinType.INNER; + if (ctx.LEFT() != null) return Join.JoinType.LEFT; + if (ctx.RIGHT() != null) return Join.JoinType.RIGHT; + if (ctx.CROSS() != null) return Join.JoinType.CROSS; + return Join.JoinType.INNER; + } + + @Override + public UnresolvedPlan visitUnionSelect( + org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.UnionSelectContext + ctx) { + java.util.List plans = + ctx.querySpecification().stream() + .map(this::visit) + .collect(java.util.stream.Collectors.toList()); + UnresolvedPlan first = plans.remove(0); + org.opensearch.sql.ast.tree.Union union = + new org.opensearch.sql.ast.tree.Union(plans, 0); + union.attach(first); + return union; + } + + @Override + public UnresolvedPlan visitExceptSelect( + org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.ExceptSelectContext + ctx) { + throw new org.opensearch.sql.common.antlr.SyntaxCheckException( + "EXCEPT is not yet supported. Falling back to legacy engine."); + } + }, AstStatementBuilder.StatementBuilderContext.builder() .isExplain(isExplainRequest) .fetchSize(request.getFetchSize()) diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/AstBuilder.java b/sql/src/main/java/org/opensearch/sql/sql/parser/AstBuilder.java index 5250ab7fb0f..659e7140232 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/parser/AstBuilder.java +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/AstBuilder.java @@ -23,7 +23,6 @@ import java.util.Collections; import java.util.Locale; import java.util.Optional; -import lombok.RequiredArgsConstructor; import org.antlr.v4.runtime.tree.ParseTree; import org.opensearch.sql.ast.expression.Alias; import org.opensearch.sql.ast.expression.AllFields; @@ -32,6 +31,7 @@ import org.opensearch.sql.ast.expression.UnresolvedExpression; import org.opensearch.sql.ast.tree.DescribeRelation; import org.opensearch.sql.ast.tree.Filter; +import org.opensearch.sql.ast.tree.Join; import org.opensearch.sql.ast.tree.Limit; import org.opensearch.sql.ast.tree.Project; import org.opensearch.sql.ast.tree.Relation; @@ -50,10 +50,18 @@ import org.opensearch.sql.sql.parser.context.ParsingContext; /** Abstract syntax tree (AST) builder. */ -@RequiredArgsConstructor public class AstBuilder extends OpenSearchSQLParserBaseVisitor { - private final AstExpressionBuilder expressionBuilder = new AstExpressionBuilder(); + private final AstExpressionBuilder expressionBuilder = createExpressionBuilder(); + + /** Factory method for expression builder. Override to provide subquery-capable builder. */ + protected AstExpressionBuilder createExpressionBuilder() { + return new AstExpressionBuilder(); + } + + public AstBuilder(String query) { + this.query = query; + } /** Parsing context stack that contains context for current query parsing. */ private final ParsingContext context = new ParsingContext(); @@ -139,6 +147,13 @@ public UnresolvedPlan visitLimitClause(OpenSearchSQLParser.LimitClauseContext ct public UnresolvedPlan visitFromClause(FromClauseContext ctx) { UnresolvedPlan result = visit(ctx.relation()); + // Let ANTLR visitor dispatch handle each joinClause + for (var joinCtx : ctx.joinClause()) { + UnresolvedPlan joinPlan = visit(joinCtx); + ((Join) joinPlan).attach(result); + result = joinPlan; + } + if (ctx.whereClause() != null) { result = visit(ctx.whereClause()).attach(result); } @@ -163,6 +178,24 @@ public UnresolvedPlan visitFromClause(FromClauseContext ctx) { return result; } + @Override + public UnresolvedPlan visitJoinClause(OpenSearchSQLParser.JoinClauseContext ctx) { + throw new SyntaxCheckException( + "JOIN is not supported in the V2 SQL engine. Falling back to legacy engine."); + } + + @Override + public UnresolvedPlan visitUnionSelect(OpenSearchSQLParser.UnionSelectContext ctx) { + throw new SyntaxCheckException( + "UNION is not supported in the V2 SQL engine. Falling back to legacy engine."); + } + + @Override + public UnresolvedPlan visitExceptSelect(OpenSearchSQLParser.ExceptSelectContext ctx) { + throw new SyntaxCheckException( + "EXCEPT is not supported in the V2 SQL engine. Falling back to legacy engine."); + } + /** * Ensure NESTED function is not used in HAVING clause and fallback to legacy engine. Can remove * when support is added for NESTED function in HAVING clause. @@ -261,7 +294,7 @@ protected UnresolvedPlan aggregateResult(UnresolvedPlan aggregate, UnresolvedPla return nextResult != null ? nextResult : aggregate; } - private UnresolvedExpression visitAstExpression(ParseTree tree) { + protected UnresolvedExpression visitAstExpression(ParseTree tree) { return expressionBuilder.visit(tree); } diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java index 346ef6660d7..0058cc4369c 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java @@ -290,6 +290,20 @@ public UnresolvedExpression visitInPredicate(InPredicateContext ctx) { return ctx.NOT() != null ? AstDSL.not(in) : in; } + @Override + public UnresolvedExpression visitInSubqueryPredicate( + OpenSearchSQLParser.InSubqueryPredicateContext ctx) { + throw new org.opensearch.sql.common.antlr.SyntaxCheckException( + "IN subquery is not supported in the V2 SQL engine. Falling back to legacy engine."); + } + + @Override + public UnresolvedExpression visitExistsExpressionAtom( + OpenSearchSQLParser.ExistsExpressionAtomContext ctx) { + throw new org.opensearch.sql.common.antlr.SyntaxCheckException( + "EXISTS is not supported in the V2 SQL engine. Falling back to legacy engine."); + } + @Override public UnresolvedExpression visitAndExpression(AndExpressionContext ctx) { return new And(visit(ctx.left), visit(ctx.right));