From 17ef3ea902b3760d7930d21a736a96ce68d67251 Mon Sep 17 00:00:00 2001 From: Danny McCormick Date: Fri, 5 Jun 2026 14:55:35 +0000 Subject: [PATCH 1/5] Support global sort and improve UDF overload resolution in Beam SQL - Enable ORDER BY without LIMIT (global sort) in BeamSortRel by sorting in-memory. - Add AssertSorted test helper and testOrderBy_noLimit to verify global sort. - Map common Java classes to Calcite SqlTypeName in CalciteUtils.sqlTypeWithAutoCast, and add testSqlTypeWithAutoCast. - Prioritize overloaded methods with maximum parameter count in UdfImpl lookup, and add UdfImplTest. - Update LazyAggregateCombineFnTest to expect SQL BIGINT type instead of Java Long class. TAG=agy CONV=0df243da-2867-4795-9889-6334ba7d1599 --- .../beam/sdk/extensions/sql/impl/UdfImpl.java | 15 +++-- .../extensions/sql/impl/rel/BeamSortRel.java | 57 +++++++++++++--- .../sql/impl/utils/CalciteUtils.java | 53 ++++++++++++++- .../sql/impl/LazyAggregateCombineFnTest.java | 5 +- .../sdk/extensions/sql/impl/UdfImplTest.java | 65 +++++++++++++++++++ .../sql/impl/rel/BeamSortRelTest.java | 43 ++++++++++++ .../sql/impl/utils/CalciteUtilsTest.java | 63 ++++++++++++++++++ 7 files changed, 285 insertions(+), 16 deletions(-) create mode 100644 sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/impl/UdfImplTest.java diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/UdfImpl.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/UdfImpl.java index 7ebd3faea782..63cd3c90419f 100644 --- a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/UdfImpl.java +++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/UdfImpl.java @@ -70,17 +70,24 @@ public static Function create(Method method) { } /* - * Finds a method in a given class by name. + * Finds a method in a given class by name. In case of overloaded methods with the same name, + * this prioritizes the overload with the maximum number of parameters. This ensures Calcite + * can resolve optional/default trailing parameters correctly when binding UDF overloads. + * * @param clazz class to search method in * @param name name of the method to find - * @return the first method with matching name or null when no method found + * @return the matching method with the highest parameter count or null when no method found */ static @Nullable Method findMethod(Class clazz, String name) { + Method bestMethod = null; for (Method method : clazz.getMethods()) { if (method.getName().equals(name) && !method.isBridge()) { - return method; + if (bestMethod == null + || method.getParameterTypes().length > bestMethod.getParameterTypes().length) { + bestMethod = method; + } } } - return null; + return bestMethod; } } diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamSortRel.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamSortRel.java index aaa4d66011a6..23c39f0d6c64 100644 --- a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamSortRel.java +++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamSortRel.java @@ -40,6 +40,7 @@ import org.apache.beam.sdk.state.ValueState; import org.apache.beam.sdk.transforms.DoFn; import org.apache.beam.sdk.transforms.Flatten; +import org.apache.beam.sdk.transforms.GroupByKey; import org.apache.beam.sdk.transforms.PTransform; import org.apache.beam.sdk.transforms.ParDo; import org.apache.beam.sdk.transforms.Top; @@ -74,15 +75,12 @@ *
{@code
  * SELECT * FROM t ORDER BY id DESC LIMIT 10;
  * SELECT * FROM t ORDER BY id DESC LIMIT 10 OFFSET 5;
- * }
- * - *

but an ORDER BY without a LIMIT is NOT supported. For example, the following will throw an - * exception: - * - *

{@code
  * SELECT * FROM t ORDER BY id DESC;
  * }
* + *

Note: ORDER BY without a LIMIT is supported by keying all rows to a single key and sorting + * them in memory. This can be memory-intensive and may fail for large datasets. + * *

Constraints

* *