diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java index ac81842a9..e9fcb5241 100644 --- a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java @@ -1,31 +1,45 @@ /** - * Copyright 2017-2022 LinkedIn Corporation. All rights reserved. + * Copyright 2017-2026 LinkedIn Corporation. All rights reserved. * Licensed under the BSD-2 Clause license. * See LICENSE in the project root for license information. */ package com.linkedin.coral.hive.hive2rel; +import java.util.ArrayList; import java.util.List; import javax.annotation.Nonnull; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import org.apache.calcite.plan.RelOptTable; +import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelRoot; +import org.apache.calcite.rel.logical.LogicalProject; import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexNode; import org.apache.calcite.sql.SqlNode; import org.apache.calcite.util.Util; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.linkedin.coral.common.FuzzyUnionSqlRewriter; /** * Class that implements {@link org.apache.calcite.plan.RelOptTable.ViewExpander} - * interface to support expansion of Hive Views to relational algebra. + * interface to support expansion of Hive Views to relational algebra. The + * returned {@link RelRoot} is realigned to the caller-provided + * {@link RelDataType} by name (case-insensitive) to preserve the + * {@link RelOptTable.ViewExpander} contract. */ public class HiveViewExpander implements RelOptTable.ViewExpander { + private static final Logger LOG = LoggerFactory.getLogger(HiveViewExpander.class); + private final HiveToRelConverter hiveToRelConverter; /** * Instantiates a new Hive view expander. @@ -46,6 +60,58 @@ public RelRoot expandView(RelDataType rowType, String queryString, List SqlNode sqlNode = hiveToRelConverter.processView(dbName, tableName) .accept(new FuzzyUnionSqlRewriter(tableName, hiveToRelConverter)); - return hiveToRelConverter.getSqlToRelConverter().convertQuery(sqlNode, true, true); + RelRoot root = hiveToRelConverter.getSqlToRelConverter().convertQuery(sqlNode, true, true); + return alignToRowType(root, rowType); + } + + /** + * Wrap {@code root} in a {@link LogicalProject} that re-orders its output + * fields to match {@code expected} by name (case-insensitive). No-op when the + * orderings already agree. If a name is missing from {@code root} or arities + * differ, returns {@code root} unchanged and logs a warning. + */ + @VisibleForTesting + static RelRoot alignToRowType(RelRoot root, RelDataType expected) { + final RelNode rel = root.rel; + final RelDataType actual = rel.getRowType(); + if (expected.getFieldCount() != actual.getFieldCount()) { + LOG.warn("Skipping row-type alignment: expected {} fields, expanded view produced {}. expected={}, actual={}", + expected.getFieldCount(), actual.getFieldCount(), expected, actual); + return root; + } + if (fieldNamesAlignedByOrder(expected, actual)) { + return root; + } + final List projects = new ArrayList<>(expected.getFieldCount()); + final List names = new ArrayList<>(expected.getFieldCount()); + final RexBuilder rexBuilder = rel.getCluster().getRexBuilder(); + for (RelDataTypeField expectedField : expected.getFieldList()) { + // case-insensitive name lookup, no struct-field traversal + final RelDataTypeField actualField = actual.getField(expectedField.getName(), false, false); + if (actualField == null) { + LOG.warn( + "Skipping row-type alignment: expected field '{}' is absent from expanded view. expected={}, actual={}", + expectedField.getName(), expected, actual); + return root; + } + projects.add(rexBuilder.makeInputRef(actualField.getType(), actualField.getIndex())); + names.add(expectedField.getName()); + } + final RelNode aligned = LogicalProject.create(rel, projects, names); + return RelRoot.of(aligned, root.kind); + } + + private static boolean fieldNamesAlignedByOrder(RelDataType a, RelDataType b) { + if (a.getFieldCount() != b.getFieldCount()) { + return false; + } + final List af = a.getFieldList(); + final List bf = b.getFieldList(); + for (int i = 0; i < af.size(); i++) { + if (!af.get(i).getName().equalsIgnoreCase(bf.get(i).getName())) { + return false; + } + } + return true; } } diff --git a/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/HiveToRelConverterTest.java b/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/HiveToRelConverterTest.java index ba77b3f61..67bd84563 100644 --- a/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/HiveToRelConverterTest.java +++ b/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/HiveToRelConverterTest.java @@ -13,8 +13,13 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import org.apache.calcite.jdbc.JavaTypeFactoryImpl; import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelRoot; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.type.ReturnTypes; import org.apache.calcite.sql.type.SqlTypeFamily; @@ -668,6 +673,40 @@ public void testIntCastToBigIntDuringComparison() { assertEquals(relToString(sql), expected); } + /** + * Regression test for the {@link org.apache.calcite.plan.RelOptTable.ViewExpander} contract: + * the {@link org.apache.calcite.rel.RelRoot} returned from {@link HiveViewExpander#expandView} + * must have a row type that matches the caller-provided {@code rowType}, even when the + * re-parsed view body would naturally produce its columns in a different order. Without the + * alignment, downstream consumers that rely on positional access (e.g. {@code SELECT *} over + * a view-on-view) would index into the wrong RelNode output column. + */ + @Test + public void testExpandViewAlignsToCallerRowType() { + // Build a caller-provided rowType whose order is the reverse of the view body's + // natural order. The view body produces [col_a INT, col_b STRING]; we ask for + // [col_b STRING, col_a INT]. + RelDataTypeFactory typeFactory = new JavaTypeFactoryImpl(); + RelDataType reversedRowType = typeFactory.builder().add("col_b", typeFactory.createSqlType(SqlTypeName.VARCHAR)) + .add("col_a", typeFactory.createSqlType(SqlTypeName.INTEGER)).build(); + + HiveViewExpander expander = new HiveViewExpander(converter); + RelRoot result = expander.expandView(reversedRowType, "ignored", ImmutableList.of("hive", "test"), + ImmutableList.of("realign_view")); + + List actualNames = result.rel.getRowType().getFieldNames(); + assertEquals(actualNames.size(), 2, "Expected 2 output columns"); + assertEquals(actualNames.get(0).toLowerCase(), "col_b", + "Position 0 must be col_b (matches caller-provided rowType), got: " + actualNames); + assertEquals(actualNames.get(1).toLowerCase(), "col_a", + "Position 1 must be col_a (matches caller-provided rowType), got: " + actualNames); + + // The types must also line up with the caller-provided rowType, not the body's natural order. + List fields = result.rel.getRowType().getFieldList(); + assertEquals(fields.get(0).getType().getSqlTypeName(), SqlTypeName.VARCHAR); + assertEquals(fields.get(1).getType().getSqlTypeName(), SqlTypeName.INTEGER); + } + private String relToString(String sql) { return RelOptUtil.toString(converter.convertSql(sql)); } diff --git a/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/HiveViewExpanderTest.java b/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/HiveViewExpanderTest.java new file mode 100644 index 000000000..71679b1cf --- /dev/null +++ b/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/HiveViewExpanderTest.java @@ -0,0 +1,215 @@ +/** + * Copyright 2026 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.hive.hive2rel; + +import java.util.List; + +import org.apache.calcite.jdbc.JavaTypeFactoryImpl; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.volcano.VolcanoPlanner; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelRoot; +import org.apache.calcite.rel.core.Values; +import org.apache.calcite.rel.logical.LogicalProject; +import org.apache.calcite.rel.logical.LogicalValues; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.type.SqlTypeName; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertSame; +import static org.testng.Assert.assertTrue; + + +/** + * Unit tests for {@link HiveViewExpander#alignToRowType}. These exercise the + * alignment logic in isolation -- no Hive metastore or end-to-end view expansion + * required. + */ +public class HiveViewExpanderTest { + + private RelDataTypeFactory typeFactory; + private RelOptCluster cluster; + + @BeforeClass + public void setUp() { + typeFactory = new JavaTypeFactoryImpl(); + cluster = RelOptCluster.create(new VolcanoPlanner(), new RexBuilder(typeFactory)); + } + + private RelDataType rowType(Object... nameTypePairs) { + if (nameTypePairs.length % 2 != 0) { + throw new IllegalArgumentException("Expected (name, type) pairs"); + } + RelDataTypeFactory.Builder builder = typeFactory.builder(); + for (int i = 0; i < nameTypePairs.length; i += 2) { + builder.add((String) nameTypePairs[i], (SqlTypeName) nameTypePairs[i + 1]); + } + return builder.build(); + } + + private RelRoot rootWithRowType(RelDataType type) { + RelNode rel = LogicalValues.createEmpty(cluster, type); + return RelRoot.of(rel, SqlKind.SELECT); + } + + @Test + public void testAlignedByOrderReturnsSameRoot() { + RelDataType type = rowType("a", SqlTypeName.INTEGER, "b", SqlTypeName.VARCHAR); + RelRoot root = rootWithRowType(type); + + RelRoot result = HiveViewExpander.alignToRowType(root, type); + + assertSame(result, root, "Already-aligned root should be returned unchanged"); + } + + @Test + public void testCaseDifferencesOnlyReturnsSameRoot() { + RelRoot root = rootWithRowType(rowType("colA", SqlTypeName.INTEGER, "colB", SqlTypeName.VARCHAR)); + RelDataType expected = rowType("cola", SqlTypeName.INTEGER, "colb", SqlTypeName.VARCHAR); + + RelRoot result = HiveViewExpander.alignToRowType(root, expected); + + assertSame(result, root, "Names differing only in case should be treated as aligned (case-insensitive)"); + } + + @Test + public void testReorderedFieldsWrapInProject() { + RelRoot root = rootWithRowType(rowType("b", SqlTypeName.VARCHAR, "a", SqlTypeName.INTEGER)); + RelDataType expected = rowType("a", SqlTypeName.INTEGER, "b", SqlTypeName.VARCHAR); + + RelRoot result = HiveViewExpander.alignToRowType(root, expected); + + assertTrue(result.rel instanceof LogicalProject, + "Reordered fields should result in a LogicalProject wrapper, got " + result.rel.getClass()); + LogicalProject project = (LogicalProject) result.rel; + assertTrue(project.getInput() instanceof Values, "Wrapped Project should sit directly on the input"); + + List outFields = result.rel.getRowType().getFieldList(); + assertEquals(outFields.size(), 2); + assertEquals(outFields.get(0).getName(), "a"); + assertEquals(outFields.get(0).getType().getSqlTypeName(), SqlTypeName.INTEGER); + assertEquals(outFields.get(1).getName(), "b"); + assertEquals(outFields.get(1).getType().getSqlTypeName(), SqlTypeName.VARCHAR); + } + + /** + * Models the production failure mode this method exists to prevent: an + * upstream view body that lists {@code idHash} (the longer name) ahead of + * its sibling {@code id} (the shorter name, a prefix of the other) + * introduces column aliases whose ordering disagrees with the downstream + * consumer's expected ordering. Without realignment, the downstream's + * positional access lands {@code id} on the {@code idHash} position and + * vice versa, silently swapping every value read from those columns. + */ + @Test + public void testPrefixSiblingsReorderedCorrectly() { + RelRoot root = rootWithRowType(rowType("idHash", SqlTypeName.VARCHAR, "id", SqlTypeName.INTEGER)); + RelDataType expected = rowType("id", SqlTypeName.INTEGER, "idHash", SqlTypeName.VARCHAR); + + RelRoot result = HiveViewExpander.alignToRowType(root, expected); + + assertTrue(result.rel instanceof LogicalProject); + LogicalProject project = (LogicalProject) result.rel; + + // Output field order matches expected: id (INTEGER) first, idHash (VARCHAR) second. + List outFields = result.rel.getRowType().getFieldList(); + assertEquals(outFields.get(0).getName(), "id"); + assertEquals(outFields.get(0).getType().getSqlTypeName(), SqlTypeName.INTEGER); + assertEquals(outFields.get(1).getName(), "idHash"); + assertEquals(outFields.get(1).getType().getSqlTypeName(), SqlTypeName.VARCHAR); + + // And the underlying input refs cross-link to the correct source positions: + // expected[0]=id -> input position 1; expected[1]=idHash -> input position 0. + assertTrue(project.getProjects().get(0) instanceof RexInputRef); + assertTrue(project.getProjects().get(1) instanceof RexInputRef); + assertEquals(((RexInputRef) project.getProjects().get(0)).getIndex(), 1); + assertEquals(((RexInputRef) project.getProjects().get(1)).getIndex(), 0); + } + + /** + * Realignment over multiple prefix-name pairs at once. Each pair in the input + * row type lists the longer name first ({@code idHash} before {@code id}, + * {@code emailVerified} before {@code email}, {@code countryCode} before + * {@code country}); the caller's expected row type lists them the other way + * around. Mirrors the realistic failure shape where an upstream view body's + * case-sensitive alphabetical sort places longer prefix-extensions ahead of + * their shorter siblings, and a downstream consumer expects the + * lowercase-alphabetical (prefix-first) order. Each pair must be realigned + * independently and correctly. + */ + @Test + public void testMultiplePrefixSiblingsAllReorderedCorrectly() { + RelRoot root = rootWithRowType( + rowType("idHash", SqlTypeName.VARCHAR, "id", SqlTypeName.INTEGER, "emailVerified", SqlTypeName.BOOLEAN, "email", + SqlTypeName.VARCHAR, "countryCode", SqlTypeName.VARCHAR, "country", SqlTypeName.VARCHAR)); + RelDataType expected = + rowType("id", SqlTypeName.INTEGER, "idHash", SqlTypeName.VARCHAR, "email", SqlTypeName.VARCHAR, "emailVerified", + SqlTypeName.BOOLEAN, "country", SqlTypeName.VARCHAR, "countryCode", SqlTypeName.VARCHAR); + + RelRoot result = HiveViewExpander.alignToRowType(root, expected); + + assertTrue(result.rel instanceof LogicalProject); + LogicalProject project = (LogicalProject) result.rel; + + // Output names and types follow the expected (prefix-first) ordering. + List outFields = result.rel.getRowType().getFieldList(); + assertEquals(outFields.size(), 6); + assertEquals(outFields.get(0).getName(), "id"); + assertEquals(outFields.get(0).getType().getSqlTypeName(), SqlTypeName.INTEGER); + assertEquals(outFields.get(1).getName(), "idHash"); + assertEquals(outFields.get(1).getType().getSqlTypeName(), SqlTypeName.VARCHAR); + assertEquals(outFields.get(2).getName(), "email"); + assertEquals(outFields.get(2).getType().getSqlTypeName(), SqlTypeName.VARCHAR); + assertEquals(outFields.get(3).getName(), "emailVerified"); + assertEquals(outFields.get(3).getType().getSqlTypeName(), SqlTypeName.BOOLEAN); + assertEquals(outFields.get(4).getName(), "country"); + assertEquals(outFields.get(4).getType().getSqlTypeName(), SqlTypeName.VARCHAR); + assertEquals(outFields.get(5).getName(), "countryCode"); + assertEquals(outFields.get(5).getType().getSqlTypeName(), SqlTypeName.VARCHAR); + + // Each expected position points at the correct source position in the input row type. + // Input order was: [idHash=0, id=1, emailVerified=2, email=3, countryCode=4, country=5]. + int[] expectedSourceIndices = { 1, 0, 3, 2, 5, 4 }; + for (int i = 0; i < expectedSourceIndices.length; i++) { + assertTrue(project.getProjects().get(i) instanceof RexInputRef, "Project " + i + " should be a RexInputRef"); + assertEquals(((RexInputRef) project.getProjects().get(i)).getIndex(), expectedSourceIndices[i], + "Expected position " + i + " (" + outFields.get(i).getName() + ") should source from input position " + + expectedSourceIndices[i]); + } + } + + @Test + public void testMissingFieldReturnsOriginalRoot() { + RelRoot root = rootWithRowType(rowType("a", SqlTypeName.INTEGER, "b", SqlTypeName.VARCHAR)); + // `c` is not in the input -- alignment should bail rather than fabricate. + RelDataType expected = rowType("a", SqlTypeName.INTEGER, "c", SqlTypeName.VARCHAR); + + RelRoot result = HiveViewExpander.alignToRowType(root, expected); + + assertSame(result, root, "Missing field should trigger safe fallback to the original root"); + } + + @Test + public void testDifferentArityReturnsOriginalRoot() { + RelRoot root = + rootWithRowType(rowType("a", SqlTypeName.INTEGER, "b", SqlTypeName.VARCHAR, "c", SqlTypeName.DOUBLE)); + RelDataType expected = rowType("a", SqlTypeName.INTEGER, "b", SqlTypeName.VARCHAR); + + RelRoot result = HiveViewExpander.alignToRowType(root, expected); + + assertSame(result, root, "Different arity should trigger safe fallback rather than silent column drop"); + assertFalse(result.rel instanceof LogicalProject, + "Different-arity case should not wrap in a Project (would be lossy)"); + } +} diff --git a/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/TestUtils.java b/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/TestUtils.java index d9443f44b..43f61622b 100644 --- a/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/TestUtils.java +++ b/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/TestUtils.java @@ -85,6 +85,10 @@ public static TestHive setupDefaultHive(HiveConf conf) throws IOException { driver.run( "CREATE TABLE IF NOT EXISTS test.tableInt(tinyint_col tinyint, smallint_col smallint, int_col int, bigint_col bigint)"); + // Fixture for HiveViewExpander.expandView() row-type alignment regression test. + driver.run("CREATE TABLE IF NOT EXISTS test.realign_base(col_a int, col_b string)"); + driver.run("CREATE VIEW IF NOT EXISTS test.realign_view AS SELECT * FROM test.realign_base"); + driver.run("CREATE DATABASE IF NOT EXISTS fuzzy_union"); driver.run("CREATE TABLE IF NOT EXISTS fuzzy_union.tableA(a int, b struct)");