Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
5396e92
HIVE-29413: Avoid code duplication by updating getPartCols method for…
ramitg254 Apr 8, 2026
0676ae7
commit-2
ramitg254 Apr 9, 2026
b4ea184
corrected bucket-map-join test
ramitg254 Apr 9, 2026
96823d4
corrected update statements
ramitg254 Apr 10, 2026
39b48b7
corrected load, partition evolution tests
ramitg254 Apr 11, 2026
162947e
refractored
ramitg254 Apr 11, 2026
f144909
addressed sonar issues
ramitg254 Apr 12, 2026
0a704bb
updated table api and usage
ramitg254 Apr 26, 2026
c02c458
introduced index optimization
ramitg254 May 24, 2026
0feb8bb
corrected implementation
ramitg254 May 24, 2026
d3f976f
updated describe implementation and outputs
ramitg254 May 24, 2026
246cb2a
updated api and test
ramitg254 May 25, 2026
954609b
updated update implementation
ramitg254 May 25, 2026
f7544ed
updated partition pruning and query rewriting
ramitg254 May 25, 2026
6053034
changes related to metatable
ramitg254 May 25, 2026
409cbe8
corrected alter and semantic analyzer implementation
ramitg254 May 26, 2026
83940d9
updated merge implementation and test output
ramitg254 May 26, 2026
7606f66
updated ctas create and tests output
ramitg254 May 26, 2026
ebb6085
updated stats autogather and test output
ramitg254 May 26, 2026
d897942
updated getPartitionKeys
ramitg254 May 27, 2026
44b2be4
removed getStorageSchemaCols part-1
ramitg254 May 30, 2026
3e49846
removed getStorageSchemaCols part-2
ramitg254 May 31, 2026
150b8a2
removed getStorageSchemaCols part-3
ramitg254 May 31, 2026
4b03195
removed workaround
ramitg254 May 31, 2026
e2277cf
addressed sonar issues
ramitg254 Jun 1, 2026
247c205
non part cols retrieval made lazy
ramitg254 Jun 1, 2026
b17625d
reviewed required changes
ramitg254 Jun 9, 2026
6693fca
corrected partition.getCols for iceberg table
ramitg254 Jun 9, 2026
2c30769
added wrapper for lineage
ramitg254 Jun 9, 2026
ebb31ca
reverted getPartitionKeys
ramitg254 Jun 9, 2026
e30d562
refractor-1
ramitg254 Jun 10, 2026
f1d8876
refractor-2
ramitg254 Jun 10, 2026
336347e
correction for rebased iceberg view commit recently merged to master
ramitg254 Jun 11, 2026
fe7bed3
removed isTableTypeSet and merged partition column comments
ramitg254 Jun 11, 2026
7238d7c
updated conflicts for rebase
ramitg254 Jun 11, 2026
1359ac8
reverted to user comment override and refractored
ramitg254 Jun 11, 2026
2f8945d
moved helpers to MetaStoreUtils
ramitg254 Jun 12, 2026
e37919d
moved to HiveTableUtil
ramitg254 Jun 18, 2026
4df3f7c
updated method name
ramitg254 Jun 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.metastore.HiveMetaHook;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.api.CreateTableRequest;
import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
Expand Down Expand Up @@ -118,16 +117,6 @@ public BaseHiveIcebergMetaHook(Configuration conf) {
this.conf = conf;
}

public static boolean isIcebergView(org.apache.hadoop.hive.metastore.api.Table hmsTable) {
if (hmsTable == null ||
hmsTable.getParameters() == null ||
!TableType.VIRTUAL_VIEW.toString().equals(hmsTable.getTableType())) {
return false;
}
String storageHandler = hmsTable.getParameters().get(hive_metastoreConstants.META_TABLE_STORAGE);
return HiveMetaHook.HIVE_ICEBERG_STORAGE_HANDLER.equals(storageHandler);
}

@Override
public void preCreateTable(org.apache.hadoop.hive.metastore.api.Table hmsTable) {
CreateTableRequest request = new CreateTableRequest(hmsTable);
Expand All @@ -140,7 +129,7 @@ public void preCreateTable(CreateTableRequest request) {
if (hmsTable.isTemporary()) {
throw new UnsupportedOperationException("Creation of temporary iceberg tables is not supported.");
}
if (isIcebergView(hmsTable)) {
if (HiveTableUtil.isIcebergView(hmsTable)) {
preCreateIcebergView(request);
return;
}
Expand Down Expand Up @@ -533,7 +522,7 @@ protected void setWriteModeDefaults(Table icebergTbl, Map<String, String> newPro
public void postGetTable(org.apache.hadoop.hive.metastore.api.Table hmsTable) {
if (hmsTable != null) {
try {
if (isIcebergView(hmsTable)) {
if (HiveTableUtil.isIcebergView(hmsTable)) {
IcebergViewSupport.enrichHmsTableFromIcebergView(hmsTable, conf);
return;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ public HiveIcebergMetaHook(Configuration conf) {

@Override
public void commitCreateTable(org.apache.hadoop.hive.metastore.api.Table hmsTable) {
if (isIcebergView(hmsTable)) {
if (HiveTableUtil.isIcebergView(hmsTable)) {
tableProperties = IcebergTableProperties.getTableProperties(hmsTable, conf);
Map<String, String> tblProps =
hmsTable.getParameters() == null ? Maps.newHashMap() : Maps.newHashMap(hmsTable.getParameters());
Expand Down Expand Up @@ -266,7 +266,7 @@ public void commitDropTable(org.apache.hadoop.hive.metastore.api.Table hmsTable,
@Override
public void preAlterTable(org.apache.hadoop.hive.metastore.api.Table hmsTable, EnvironmentContext context)
throws MetaException {
if (BaseHiveIcebergMetaHook.isIcebergView(hmsTable)) {
if (HiveTableUtil.isIcebergView(hmsTable)) {
currentAlterTableOp = null;
if (commitLock == null) {
commitLock = new NoLock();
Expand Down Expand Up @@ -503,7 +503,7 @@ public void commitAlterTable(org.apache.hadoop.hive.metastore.api.Table hmsTable
if (commitLock == null) {
throw new IllegalStateException("Hive commit lock should already be set");
}
if (BaseHiveIcebergMetaHook.isIcebergView(hmsTable)) {
if (HiveTableUtil.isIcebergView(hmsTable)) {
tableProperties = IcebergTableProperties.getTableProperties(hmsTable, conf);
Map<String, String> tblProps =
hmsTable.getParameters() == null ? Maps.newHashMap() : Maps.newHashMap(hmsTable.getParameters());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,7 @@ public boolean canProvidePartitionStatistics(org.apache.hadoop.hive.ql.metadata.
if (!getStatsSource().equals(HiveMetaHook.ICEBERG)) {
return false;
}
if (BaseHiveIcebergMetaHook.isIcebergView(hmsTable.getTTable())) {
if (HiveTableUtil.isIcebergView(hmsTable.getTTable())) {
return false;
}
Table table = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
Expand Down Expand Up @@ -899,7 +899,7 @@ public boolean supportsPartitionTransform() {

@Override
public List<TransformSpec> getPartitionTransformSpec(org.apache.hadoop.hive.ql.metadata.Table hmsTable) {
if (BaseHiveIcebergMetaHook.isIcebergView(hmsTable.getTTable())) {
if (HiveTableUtil.isIcebergView(hmsTable.getTTable())) {
return Collections.emptyList();
}
Table table = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
Expand All @@ -916,7 +916,7 @@ public List<TransformSpec> getPartitionTransformSpec(org.apache.hadoop.hive.ql.m
@Override
public Map<Integer, List<TransformSpec>> getPartitionTransformSpecs(
org.apache.hadoop.hive.ql.metadata.Table hmsTable) {
if (BaseHiveIcebergMetaHook.isIcebergView(hmsTable.getTTable())) {
if (HiveTableUtil.isIcebergView(hmsTable.getTTable())) {
return Collections.emptyMap();
}
Table table = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
Expand Down Expand Up @@ -1550,10 +1550,10 @@ public List<FieldSchema> acidSelectColumns(org.apache.hadoop.hive.ql.metadata.Ta
case DELETE ->
// TODO: make it configurable whether we want to include the table columns in the select query.
// It might make delete writes faster if we don't have to write out the row object
ListUtils.union(ACID_VIRTUAL_COLS_AS_FIELD_SCHEMA, table.getCols());
ListUtils.union(ACID_VIRTUAL_COLS_AS_FIELD_SCHEMA, table.getAllCols());
Comment thread
deniskuzZ marked this conversation as resolved.
case UPDATE -> shouldOverwrite(table, operation) ?
ACID_VIRTUAL_COLS_AS_FIELD_SCHEMA :
ListUtils.union(ACID_VIRTUAL_COLS_AS_FIELD_SCHEMA, table.getCols());
ListUtils.union(ACID_VIRTUAL_COLS_AS_FIELD_SCHEMA, table.getAllCols());
case MERGE -> ACID_VIRTUAL_COLS_AS_FIELD_SCHEMA;
default -> ImmutableList.of();
};
Expand Down Expand Up @@ -1584,7 +1584,7 @@ public boolean supportsSortColumns() {

@Override
public List<FieldSchema> sortColumns(org.apache.hadoop.hive.ql.metadata.Table hmsTable) {
if (BaseHiveIcebergMetaHook.isIcebergView(hmsTable.getTTable())) {
if (HiveTableUtil.isIcebergView(hmsTable.getTTable())) {
return Collections.emptyList();
}
TableDesc tableDesc = Utilities.getTableDesc(hmsTable);
Expand Down Expand Up @@ -1989,8 +1989,9 @@ public void setTableParametersForCTLT(org.apache.hadoop.hive.ql.metadata.Table t
desc.setIsExternal(true);
}

// If source is Iceberg table set the schema and the partition spec
if (MetaStoreUtils.isIcebergTable(origParams)) {
// parameter table_type is set to "ICEBERG" in case of Iceberg tables
// set the schema and the partition spec accordingly
if (HiveTableUtil.isTableTypeSet(origParams)) {
tbl.getParameters()
.put(InputFormatConfig.TABLE_SCHEMA, origParams.get(InputFormatConfig.TABLE_SCHEMA));
tbl.getParameters()
Expand Down Expand Up @@ -2147,13 +2148,12 @@ public List<Partition> getPartitions(org.apache.hadoop.hive.ql.metadata.Table hm
}

public boolean isPartitioned(org.apache.hadoop.hive.ql.metadata.Table hmsTable) {
if (BaseHiveIcebergMetaHook.isIcebergView(hmsTable.getTTable())) {
List<FieldSchema> partCols = hmsTable.getPartCols();
return partCols != null && !partCols.isEmpty();
}
if (!hmsTable.getTTable().isSetId()) {
if (hmsTable.getMetaTable() != null) {
return false;
}
if (!HiveTableUtil.isRegistered(hmsTable) || HiveTableUtil.isIcebergView(hmsTable.getTTable())) {
return !hmsTable.getPartitionKeys().isEmpty();
}
Table table = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
Snapshot snapshot = IcebergTableUtil.getTableSnapshot(table, hmsTable);

Expand Down Expand Up @@ -2296,13 +2296,12 @@ public boolean canPerformMetadataDelete(org.apache.hadoop.hive.ql.metadata.Table

@Override
public List<FieldSchema> getPartitionKeys(org.apache.hadoop.hive.ql.metadata.Table hmsTable) {
if (BaseHiveIcebergMetaHook.isIcebergView(hmsTable.getTTable())) {
List<FieldSchema> partCols = hmsTable.getPartCols();
return partCols != null ? partCols : Collections.emptyList();
}
if (!hmsTable.getTTable().isSetId()) {
if (hmsTable.getMetaTable() != null) {
return Collections.emptyList();
}
if (!HiveTableUtil.isRegistered(hmsTable) || HiveTableUtil.isIcebergView(hmsTable.getTTable())) {
return hmsTable.getPartitionKeys();
Comment thread
deniskuzZ marked this conversation as resolved.
}
Table icebergTable = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
return MetastoreUtil.getPartitionKeys(icebergTable, icebergTable.spec().specId());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaHook;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.MetaException;
Expand Down Expand Up @@ -380,4 +382,23 @@ public static boolean isCtas(Properties properties) {
return Boolean.parseBoolean(properties.getProperty(hive_metastoreConstants.TABLE_IS_CTAS));
}

public static boolean isTableTypeSet(Map<String, String> params) {
Comment thread
deniskuzZ marked this conversation as resolved.
return params != null &&
HiveMetaHook.ICEBERG.equalsIgnoreCase(params.get(HiveMetaHook.TABLE_TYPE));
}

public static boolean isIcebergView(org.apache.hadoop.hive.metastore.api.Table hmsTable) {
if (hmsTable == null ||
hmsTable.getParameters() == null ||
!TableType.VIRTUAL_VIEW.toString().equals(hmsTable.getTableType())) {
return false;
}
String storageHandler = hmsTable.getParameters().get(hive_metastoreConstants.META_TABLE_STORAGE);
return HiveMetaHook.HIVE_ICEBERG_STORAGE_HANDLER.equals(storageHandler);
}

public static boolean isRegistered(org.apache.hadoop.hive.ql.metadata.Table hmsTable) {
return hmsTable.getTTable().isSetId() && isTableTypeSet(hmsTable.getParameters());
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -946,7 +946,8 @@ public void testCreateTableWithoutColumnComments() {
@Test
public void testCreatePartitionedTableWithColumnComments() {
TableIdentifier identifier = TableIdentifier.of("default", "partitioned_with_comment_table");
String[] expectedDoc = new String[] {"int column", "string column", null, "partition column", null};
String[] expectedDoc = new String[] {"int column", "string column", null, "partition column",
"Transform: identity"};
shell.executeStatement("CREATE EXTERNAL TABLE partitioned_with_comment_table (" +
"t_int INT COMMENT 'int column', " +
"t_string STRING COMMENT 'string column', " +
Expand All @@ -959,13 +960,18 @@ public void testCreatePartitionedTableWithColumnComments() {

List<Object[]> rows = shell.executeStatement("DESCRIBE default.partitioned_with_comment_table");
List<Types.NestedField> columns = icebergTable.schema().columns();
List<String> partitionColumns = List.of("t_string_3", "t_string_4");
// The partition transform information and partition information is 6 extra lines, and 4 more line for the columns
Assert.assertEquals(columns.size() + 10, rows.size());
for (int i = 0; i < columns.size(); i++) {
Types.NestedField field = columns.get(i);
Assert.assertArrayEquals(new Object[] {field.name(), HiveSchemaUtil.convert(field.type()).getTypeName(),
field.doc() != null ? field.doc() : ""}, rows.get(i));
Assert.assertEquals(expectedDoc[i], field.doc());
String fieldDoc = field.doc();
if (fieldDoc == null && partitionColumns.contains(field.name())) {
fieldDoc = "Transform: identity";
}
Assert.assertArrayEquals(new Object[]{field.name(), HiveSchemaUtil.convert(field.type()).getTypeName(),
fieldDoc != null ? fieldDoc : ""}, rows.get(i));
Assert.assertEquals(expectedDoc[i], fieldDoc);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,8 +177,6 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_orc
# col_name data_type comment
a int
b string
c string

# Partition Information
# col_name data_type comment
Expand Down Expand Up @@ -453,8 +451,6 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_parquet
# col_name data_type comment
a int
b string
c string

# Partition Information
# col_name data_type comment
Expand Down Expand Up @@ -729,8 +725,6 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_avro
# col_name data_type comment
a int
b string
c string

# Partition Information
# col_name data_type comment
Expand Down Expand Up @@ -1066,9 +1060,6 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_orc_mixed
# col_name data_type comment
a int
b double
c int
d string

# Partition Information
# col_name data_type comment
Expand Down Expand Up @@ -1513,9 +1504,6 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_parquet_mixed
# col_name data_type comment
a int
b double
c int
d string

# Partition Information
# col_name data_type comment
Expand Down Expand Up @@ -1960,9 +1948,6 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_avro_mixed
# col_name data_type comment
a int
b double
c int
d string

# Partition Information
# col_name data_type comment
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,6 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_orc
# col_name data_type comment
a int
b string

# Partition Information
# col_name data_type comment
Expand Down Expand Up @@ -415,7 +414,6 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_parquet
# col_name data_type comment
a int
b string

# Partition Information
# col_name data_type comment
Expand Down Expand Up @@ -770,7 +768,6 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_parquet_int
# col_name data_type comment
a int
b int

# Partition Information
# col_name data_type comment
Expand Down Expand Up @@ -1125,7 +1122,6 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_parquet_double
# col_name data_type comment
a int
b double

# Partition Information
# col_name data_type comment
Expand Down Expand Up @@ -1426,7 +1422,6 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_avro
# col_name data_type comment
a int
b string

# Partition Information
# col_name data_type comment
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,21 +34,21 @@ Stage-0
limit:-1
Stage-1
Map 1 vectorized
File Output Operator [FS_53]
Map Join Operator [MAPJOIN_52] (rows=2 width=530)
BucketMapJoin:true,Conds:SEL_51._col1, _col2=RS_49._col1, _col2(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
File Output Operator [FS_23]
Map Join Operator [MAPJOIN_22] (rows=2 width=530)
BucketMapJoin:true,Conds:SEL_21._col1, _col2=RS_19._col1, _col2(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
<-Map 2 [CUSTOM_EDGE] vectorized
MULTICAST [RS_49]
MULTICAST [RS_19]
PartitionCols:_col2, _col1
Select Operator [SEL_48] (rows=2 width=265)
Select Operator [SEL_18] (rows=2 width=265)
Output:["_col0","_col1","_col2"]
Filter Operator [FIL_47] (rows=2 width=265)
Filter Operator [FIL_17] (rows=2 width=265)
predicate:(id is not null and part is not null)
TableScan [TS_3] (rows=2 width=265)
default@tbl,tbl2,Tbl:COMPLETE,Col:COMPLETE,Output:["foid","part","id"]
<-Select Operator [SEL_51] (rows=2 width=265)
<-Select Operator [SEL_21] (rows=2 width=265)
Output:["_col0","_col1","_col2"]
Filter Operator [FIL_50] (rows=2 width=265)
Filter Operator [FIL_20] (rows=2 width=265)
predicate:(id is not null and part is not null)
TableScan [TS_0] (rows=2 width=265)
default@tbl,tbl,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:100,Grouping Partition Columns:["id","part"],Output:["foid","part","id"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ PREHOOK: Input: default@tbl_ice_puffin
POSTHOOK: query: desc formatted tbl_ice_puffin C
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_ice_puffin
col_name C
col_name c
data_type int
min 52
max 56
Expand All @@ -358,7 +358,7 @@ max_col_len
num_trues
num_falses
bit_vector HL
comment
comment Transform: identity
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}}
PREHOOK: query: EXPLAIN select count(*) from src_ice t1 join tbl_ice_puffin t2 on (t1.a = t2.a)
PREHOOK: type: QUERY
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -273,8 +273,6 @@ POSTHOOK: query: describe formatted tbl_ice
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_ice
# col_name data_type comment
a int
b string
c int

# Partition Information
Expand Down
Loading
Loading