diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/HiveFunctionHelper.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/HiveFunctionHelper.java index 556388c71914..9988a0294ad7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/HiveFunctionHelper.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/HiveFunctionHelper.java @@ -73,6 +73,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualNS; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqualNS; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; +import org.apache.hadoop.hive.ql.udf.generic.NonNullableReturnTypeUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -135,12 +136,20 @@ public RelDataType getReturnType(FunctionInfo fi, List inputs) inputsOIs[i] = createObjectInspector(inputs.get(i)); } // 2) Initialize and obtain return type - ObjectInspector oi = fi.getGenericUDF() != null ? - fi.getGenericUDF().initializeAndFoldConstants(inputsOIs) : + GenericUDF genericUDF = fi.getGenericUDF(); + ObjectInspector oi = genericUDF != null ? + genericUDF.initializeAndFoldConstants(inputsOIs) : fi.getGenericUDTF().initialize(inputsOIs); // 3) Convert to RelDataType - return TypeConverter.convert( + RelDataType returnType = TypeConverter.convert( TypeInfoUtils.getTypeInfoFromObjectInspector(oi), rexBuilder.getTypeFactory()); + // Hive has no concept of non-nullable types, but some UDFs (e.g. IS NULL) always return + // non-nullable BOOLEAN, which Calcite's RexSimplify asserts via validateStrongPolicy. + if (genericUDF instanceof NonNullableReturnTypeUDF) { + returnType = rexBuilder.getTypeFactory().createTypeWithNullability(returnType, false); + } + + return returnType; } /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNotNull.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNotNull.java index 3cb0e8ea5a48..99a64a9aeb2a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNotNull.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNotNull.java @@ -37,7 +37,7 @@ value = "_FUNC_ a - Returns true if a is not NULL and false otherwise") @VectorizedExpressions({IsNotNull.class, SelectColumnIsNotNull.class}) @NDV(maxNdv = 2) -public class GenericUDFOPNotNull extends GenericUDF { +public class GenericUDFOPNotNull extends GenericUDF implements NonNullableReturnTypeUDF { private final BooleanWritable result = new BooleanWritable(); @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNull.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNull.java index d0c363d31dee..d75f123285db 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNull.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNull.java @@ -36,7 +36,7 @@ @Description(name = "isnull", value = "_FUNC_ a - Returns true if a is NULL and false otherwise") @VectorizedExpressions({IsNull.class, SelectColumnIsNull.class}) @NDV(maxNdv = 2) -public class GenericUDFOPNull extends GenericUDF { +public class GenericUDFOPNull extends GenericUDF implements NonNullableReturnTypeUDF { private final BooleanWritable result = new BooleanWritable(); @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NonNullableReturnTypeUDF.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NonNullableReturnTypeUDF.java new file mode 100644 index 000000000000..04ae8a09b802 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NonNullableReturnTypeUDF.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +/** + * Interface for UDFs that always return a non-nullable datatype + * (e.g. {@code IS NULL}, {@code IS NOT NULL}). + */ +public interface NonNullableReturnTypeUDF { +} + + diff --git a/ql/src/test/queries/clientpositive/is_null_is_not_null_nested.q b/ql/src/test/queries/clientpositive/is_null_is_not_null_nested.q new file mode 100644 index 000000000000..267b0f8134d4 --- /dev/null +++ b/ql/src/test/queries/clientpositive/is_null_is_not_null_nested.q @@ -0,0 +1,28 @@ +-- Tests for nested IS NULL / IS NOT NULL predicates. +-- (name IS NULL) IS NULL should simplify to FALSE since IS NULL always returns a non-nullable BOOLEAN. +-- (name IS NOT NULL) IS NULL should simplify to FALSE for the same reason. + +CREATE TABLE t_is_null_nested (id INT, name STRING); + +INSERT INTO t_is_null_nested VALUES + (1, 'alice'), + (2, NULL); + +-- always FALSE +SELECT id, (name IS NULL) IS NULL FROM t_is_null_nested ORDER BY id; + +-- always FALSE +SELECT id, (name IS NOT NULL) IS NULL FROM t_is_null_nested ORDER BY id; + +-- always TRUE +SELECT id, (name IS NULL) IS NOT NULL FROM t_is_null_nested ORDER BY id; + +-- always TRUE +SELECT id, (name IS NOT NULL) IS NOT NULL FROM t_is_null_nested ORDER BY id; + +-- no rows should be filtered out +SELECT id FROM t_is_null_nested WHERE (name IS NULL) IS NOT NULL ORDER BY id; + +-- should filter out all rows +SELECT id FROM t_is_null_nested WHERE (name IS NULL) IS NULL ORDER BY id; + diff --git a/ql/src/test/results/clientpositive/llap/is_null_is_not_null_nested.q.out b/ql/src/test/results/clientpositive/llap/is_null_is_not_null_nested.q.out new file mode 100644 index 000000000000..02930bfd15e5 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/is_null_is_not_null_nested.q.out @@ -0,0 +1,80 @@ +PREHOOK: query: CREATE TABLE t_is_null_nested (id INT, name STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t_is_null_nested +POSTHOOK: query: CREATE TABLE t_is_null_nested (id INT, name STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t_is_null_nested +PREHOOK: query: INSERT INTO t_is_null_nested VALUES + (1, 'alice'), + (2, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t_is_null_nested +POSTHOOK: query: INSERT INTO t_is_null_nested VALUES + (1, 'alice'), + (2, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t_is_null_nested +POSTHOOK: Lineage: t_is_null_nested.id SCRIPT [] +POSTHOOK: Lineage: t_is_null_nested.name SCRIPT [] +PREHOOK: query: SELECT id, (name IS NULL) IS NULL FROM t_is_null_nested ORDER BY id +PREHOOK: type: QUERY +PREHOOK: Input: default@t_is_null_nested +#### A masked pattern was here #### +POSTHOOK: query: SELECT id, (name IS NULL) IS NULL FROM t_is_null_nested ORDER BY id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_is_null_nested +#### A masked pattern was here #### +1 false +2 false +PREHOOK: query: SELECT id, (name IS NOT NULL) IS NULL FROM t_is_null_nested ORDER BY id +PREHOOK: type: QUERY +PREHOOK: Input: default@t_is_null_nested +#### A masked pattern was here #### +POSTHOOK: query: SELECT id, (name IS NOT NULL) IS NULL FROM t_is_null_nested ORDER BY id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_is_null_nested +#### A masked pattern was here #### +1 false +2 false +PREHOOK: query: SELECT id, (name IS NULL) IS NOT NULL FROM t_is_null_nested ORDER BY id +PREHOOK: type: QUERY +PREHOOK: Input: default@t_is_null_nested +#### A masked pattern was here #### +POSTHOOK: query: SELECT id, (name IS NULL) IS NOT NULL FROM t_is_null_nested ORDER BY id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_is_null_nested +#### A masked pattern was here #### +1 true +2 true +PREHOOK: query: SELECT id, (name IS NOT NULL) IS NOT NULL FROM t_is_null_nested ORDER BY id +PREHOOK: type: QUERY +PREHOOK: Input: default@t_is_null_nested +#### A masked pattern was here #### +POSTHOOK: query: SELECT id, (name IS NOT NULL) IS NOT NULL FROM t_is_null_nested ORDER BY id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_is_null_nested +#### A masked pattern was here #### +1 true +2 true +PREHOOK: query: SELECT id FROM t_is_null_nested WHERE (name IS NULL) IS NOT NULL ORDER BY id +PREHOOK: type: QUERY +PREHOOK: Input: default@t_is_null_nested +#### A masked pattern was here #### +POSTHOOK: query: SELECT id FROM t_is_null_nested WHERE (name IS NULL) IS NOT NULL ORDER BY id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_is_null_nested +#### A masked pattern was here #### +1 +2 +PREHOOK: query: SELECT id FROM t_is_null_nested WHERE (name IS NULL) IS NULL ORDER BY id +PREHOOK: type: QUERY +PREHOOK: Input: default@t_is_null_nested +#### A masked pattern was here #### +POSTHOOK: query: SELECT id FROM t_is_null_nested WHERE (name IS NULL) IS NULL ORDER BY id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_is_null_nested +#### A masked pattern was here ####