diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 662ece1c907b..c2949454973b 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2182,6 +2182,17 @@ public static enum ConfVars { "Whether to use former Java date/time APIs to convert between timezones when writing timestamps in " + "Avro files. Once data are written to the file the effect is permanent (also reflected in the metadata)." + "Changing the value of this property affects only new data written to the file."), + HIVE_AVRO_SCHEMA_URL_ALLOWED_SCHEMES("hive.avro.schema.url.allowed.schemes", + "hdfs,s3,s3a,s3n,abfs,abfss,gs,wasb,wasbs,viewfs,o3fs,ofs", + "Comma-separated list of URI schemes permitted for avro.schema.url when loading schemas from a remote " + + "location. HTTP/HTTPS and file:// are never allowed via this setting. A URI with no scheme is " + + "resolved against the default filesystem."), + HIVE_AVRO_SCHEMA_URL_REMOTE_HTTP_ENABLED("hive.avro.schema.url.remote.http.enabled", false, + "Whether to allow avro.schema.url values that use http or https. When enabled, the host must also appear " + + "in hive.avro.schema.url.http.allowed.hosts. Disabled by default to prevent server-side request forgery."), + HIVE_AVRO_SCHEMA_URL_HTTP_ALLOWED_HOSTS("hive.avro.schema.url.http.allowed.hosts", "", + "Comma-separated list of hosts permitted for avro.schema.url when hive.avro.schema.url.remote.http.enabled " + + "is true. HTTP/HTTPS schema fetch is rejected when this list is empty."), HIVE_INT_TIMESTAMP_CONVERSION_IN_SECONDS("hive.int.timestamp.conversion.in.seconds", false, "Boolean/tinyint/smallint/int/bigint value is interpreted as milliseconds during the timestamp conversion.\n" + "Set this flag to true to interpret the value as seconds to be consistent with float/double." ), diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java index fa45fbbef703..0a33a31a14b2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java @@ -65,6 +65,7 @@ import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.security.authorization.AuthorizationUtils; import org.apache.hadoop.hive.ql.parse.type.ExprNodeTypeCheck; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.util.NullOrdering; @@ -1002,6 +1003,11 @@ public static ReadEntity addInput(Set inputs, ReadEntity newInput, b assert false; } else { inputs.add(newInput); + try { + AuthorizationUtils.addAvroSchemaUrlInputForReadEntity(inputs, newInput); + } catch (SemanticException e) { + throw new RuntimeException("Failed to authorize avro.schema.url for " + newInput.getName(), e); + } return newInput; } // make compile happy diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/AuthorizationUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/AuthorizationUtils.java index fe0d2de242ad..7bf207b56574 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/AuthorizationUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/AuthorizationUtils.java @@ -17,11 +17,18 @@ */ package org.apache.hadoop.hive.ql.security.authorization; +import java.net.URI; +import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.List; +import java.util.Set; +import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.classification.InterfaceAudience.LimitedPrivate; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege; import org.apache.hadoop.hive.metastore.api.HiveObjectRef; import org.apache.hadoop.hive.metastore.api.HiveObjectType; @@ -34,9 +41,13 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.hooks.Entity; import org.apache.hadoop.hive.ql.hooks.Entity.Type; +import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity.WriteType; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.PlanUtils; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizationTranslator; import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrincipal; import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrincipal.HivePrincipalType; @@ -46,6 +57,9 @@ import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject.HivePrivObjectActionType; import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject.HivePrivilegeObjectType; import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils; +import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils.AvroTableProperties; +import org.apache.hadoop.hive.serde2.avro.AvroSerDe; /** * Utility code shared by hive internal code and sql standard authorization plugin implementation @@ -307,4 +321,83 @@ public static HivePrivObjectActionType getActionType(Entity privObject) { return actionType; } + /** + * When a table or partition is read, add a DFS ReadEntity for its avro.schema.url if the + * schema would be fetched from a filesystem location at query time. + */ + public static void addAvroSchemaUrlInputForReadEntity(Collection inputs, + ReadEntity readEntity) throws SemanticException { + if (readEntity == null || !readEntity.isDirect() || readEntity.isUpdateOrDelete()) { + return; + } + switch (readEntity.getTyp()) { + case TABLE: + if (readEntity.getTable() != null) { + addAvroSchemaUrlInputIfNeeded(inputs, readEntity.getTable()); + } + break; + case PARTITION: + case DUMMYPARTITION: + if (readEntity.getPartition() != null) { + addAvroSchemaUrlInputIfNeeded(inputs, readEntity.getPartition().getTable()); + } + break; + default: + break; + } + } + + public static void addAvroSchemaUrlInputIfNeeded(Collection inputs, Table table) + throws SemanticException { + String schemaUrl = getFilesystemAvroSchemaUrlToAuthorize(table); + if (schemaUrl == null) { + return; + } + ReadEntity schemaUrlInput = toAvroSchemaUrlReadEntity(schemaUrl); + if (inputs instanceof Set) { + PlanUtils.addInput((Set) inputs, schemaUrlInput); + } else if (!inputs.contains(schemaUrlInput)) { + inputs.add(schemaUrlInput); + } + } + + private static ReadEntity toAvroSchemaUrlReadEntity(String schemaUrl) { + Path path = new Path(schemaUrl); + return new ReadEntity(path, isLocalFilesystemSchemaUrl(schemaUrl)); + } + + private static boolean isLocalFilesystemSchemaUrl(String schemaUrl) { + try { + String scheme = new URI(schemaUrl).getScheme(); + return scheme != null && "file".equalsIgnoreCase(scheme); + } catch (URISyntaxException e) { + return false; + } + } + + /** + * Returns the avro.schema.url when it refers to a filesystem location that will be read to + * resolve the schema, or null when no schema-url authorization is required. + */ + public static String getFilesystemAvroSchemaUrlToAuthorize(Table table) { + if (table == null || table.isTemporary()) { + return null; + } + if (!AvroSerDe.class.getName().equals(table.getSerializationLib())) { + return null; + } + String literal = table.getProperty(AvroTableProperties.SCHEMA_LITERAL.getPropName()); + if (StringUtils.isNotEmpty(literal) && !AvroSerdeUtils.SCHEMA_NONE.equals(literal)) { + return null; + } + String schemaUrl = table.getProperty(AvroTableProperties.SCHEMA_URL.getPropName()); + if (StringUtils.isEmpty(schemaUrl) || AvroSerdeUtils.SCHEMA_NONE.equals(schemaUrl)) { + return null; + } + if (!AvroSerdeUtils.isFilesystemSchemaUrl(schemaUrl)) { + return null; + } + return schemaUrl; + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/command/CommandAuthorizerV2.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/command/CommandAuthorizerV2.java index 665a23a1fde7..40f41c0e3636 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/command/CommandAuthorizerV2.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/command/CommandAuthorizerV2.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; +import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.HiveOperation; import org.apache.hadoop.hive.ql.security.authorization.AuthorizationUtils; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzContext; @@ -72,6 +73,7 @@ static void doAuthorization(HiveOperation op, BaseSemanticAnalyzer sem, SessionS List inputList = new ArrayList(inputs); List outputList = new ArrayList(outputs); addPermanentFunctionEntities(ss, inputList); + enrichAvroSchemaUrlInputs(inputList); List inputsHObjs = getHivePrivObjects(inputList, selectTab2Cols, hiveOpType, sem); List outputHObjs = getHivePrivObjects(outputList, updateTab2Cols, hiveOpType, sem); @@ -84,6 +86,17 @@ static void doAuthorization(HiveOperation op, BaseSemanticAnalyzer sem, SessionS ss.getAuthorizerV2().checkPrivileges(hiveOpType, inputsHObjs, outputHObjs, authzContextBuilder.build()); } + private static void enrichAvroSchemaUrlInputs(List inputList) throws HiveException { + List snapshot = new ArrayList<>(inputList); + for (ReadEntity readEntity : snapshot) { + try { + AuthorizationUtils.addAvroSchemaUrlInputForReadEntity(inputList, readEntity); + } catch (SemanticException e) { + throw new HiveException("Failed to authorize avro.schema.url for " + readEntity.getName(), e); + } + } + } + private static void addPermanentFunctionEntities(SessionState ss, List inputList) throws HiveException { for (Entry function : ss.getCurrentFunctionsInUse().entrySet()) { if (function.getValue().getFunctionType() != FunctionType.PERSISTENT) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/metastore/events/AlterTableEvent.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/metastore/events/AlterTableEvent.java index 78793b74325a..5c0e0c4ba8e0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/metastore/events/AlterTableEvent.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/metastore/events/AlterTableEvent.java @@ -32,6 +32,9 @@ import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject.HivePrivilegeObjectType; import org.apache.hadoop.hive.ql.security.authorization.plugin.metastore.HiveMetaStoreAuthzInfo; import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler; +import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils; +import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils.AvroTableProperties; +import org.apache.hadoop.hive.serde2.avro.AvroSerDe; import org.apache.hadoop.util.ReflectionUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -78,6 +81,8 @@ private List getInputHObjs() { ret.add(getHivePrivilegeObject(oldTable)); + addAvroSchemaUrlInputAuth(ret, event); + COMMAND_STR = buildCommandString(COMMAND_STR, oldTable); LOG.debug("<== AlterTableEvent.getInputHObjs(): ret={}", ret); @@ -122,6 +127,27 @@ private List getOutputHObjs() { return ret; } + private void addAvroSchemaUrlInputAuth(List ret, PreAlterTableEvent event) { + Table newTable = event.getNewTable(); + Table oldTable = event.getOldTable(); + if (!AvroSerDe.class.getName().equals(newTable.getSd().getSerdeInfo().getSerializationLib())) { + return; + } + String newSchemaUrl = newTable.getParameters().get(AvroTableProperties.SCHEMA_URL.getPropName()); + if (StringUtils.isEmpty(newSchemaUrl) || AvroSerdeUtils.SCHEMA_NONE.equals(newSchemaUrl)) { + return; + } + String oldSchemaUrl = oldTable == null ? null + : oldTable.getParameters().get(AvroTableProperties.SCHEMA_URL.getPropName()); + if (StringUtils.equals(oldSchemaUrl, newSchemaUrl)) { + return; + } + if (!AvroSerdeUtils.isFilesystemSchemaUrl(newSchemaUrl)) { + return; + } + ret.add(getHivePrivilegeObjectDfsUri(newSchemaUrl)); + } + private String buildCommandString(String cmdStr, Table tbl) { String ret = cmdStr; if (tbl != null) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/metastore/events/CreateTableEvent.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/metastore/events/CreateTableEvent.java index 69017f9d4571..e17ad9c498a4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/metastore/events/CreateTableEvent.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/metastore/events/CreateTableEvent.java @@ -31,6 +31,9 @@ import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject.HivePrivilegeObjectType; import org.apache.hadoop.hive.ql.security.authorization.plugin.metastore.HiveMetaStoreAuthorizableEvent; import org.apache.hadoop.hive.ql.security.authorization.plugin.metastore.HiveMetaStoreAuthzInfo; +import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils; +import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils.AvroTableProperties; +import org.apache.hadoop.hive.serde2.avro.AvroSerDe; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -65,18 +68,35 @@ private List getInputHObjs() { Database database = event.getDatabase(); String uri = getSdLocation(table.getSd()); - if (StringUtils.isEmpty(uri)) { - return ret; + if (StringUtils.isNotEmpty(uri)) { + // Skip DFS_URI only if table location is under default db path + if (this.needDFSUriAuth(uri, this.getDefaultTablePath(database, table))) { + ret.add(new HivePrivilegeObject(HivePrivilegeObjectType.DFS_URI, uri)); + } } - // Skip DFS_URI only if table location is under default db path - if (this.needDFSUriAuth(uri, this.getDefaultTablePath(database, table))) { - ret.add(new HivePrivilegeObject(HivePrivilegeObjectType.DFS_URI, uri)); - } + addAvroSchemaUrlInputAuth(ret, table, database); return ret; } + private void addAvroSchemaUrlInputAuth(List ret, Table table, Database database) { + if (!AvroSerDe.class.getName().equals(table.getSd().getSerdeInfo().getSerializationLib())) { + return; + } + String schemaUrl = table.getParameters().get(AvroTableProperties.SCHEMA_URL.getPropName()); + if (StringUtils.isEmpty(schemaUrl) || AvroSerdeUtils.SCHEMA_NONE.equals(schemaUrl)) { + return; + } + if (!AvroSerdeUtils.isFilesystemSchemaUrl(schemaUrl)) { + return; + } + if (!needDFSUriAuth(schemaUrl, getDefaultTablePath(database, table))) { + return; + } + ret.add(getHivePrivilegeObjectDfsUri(schemaUrl)); + } + private List getOutputHObjs() { LOG.debug("==> CreateTableEvent.getOutputHObjs()"); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/security/authorization/TestAvroSchemaUrlAuthorizationUtils.java b/ql/src/test/org/apache/hadoop/hive/ql/security/authorization/TestAvroSchemaUrlAuthorizationUtils.java new file mode 100644 index 000000000000..5a3176f4b269 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/security/authorization/TestAvroSchemaUrlAuthorizationUtils.java @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.security.authorization; + +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.ql.hooks.Entity; +import org.apache.hadoop.hive.ql.hooks.ReadEntity; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils.AvroTableProperties; +import org.apache.hadoop.hive.serde2.avro.AvroSerDe; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedHashSet; +import java.util.Map; +import java.util.Set; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +public class TestAvroSchemaUrlAuthorizationUtils { + + @Test + public void getFilesystemSchemaUrlToAuthorizeRequiresAvroSerde() { + org.apache.hadoop.hive.metastore.api.Table table = avroTable("hdfs://nn/schema.avsc", null); + table.getSd().getSerdeInfo().setSerializationLib("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"); + assertNull(AuthorizationUtils.getFilesystemAvroSchemaUrlToAuthorize(new Table(table))); + } + + @Test + public void getFilesystemSchemaUrlToAuthorizeSkipsWhenLiteralPresent() { + org.apache.hadoop.hive.metastore.api.Table table = avroTable("hdfs://nn/schema.avsc", + "{\"type\":\"record\",\"name\":\"r\",\"fields\":[]}"); + assertNull(AuthorizationUtils.getFilesystemAvroSchemaUrlToAuthorize(new Table(table))); + } + + @Test + public void getFilesystemSchemaUrlToAuthorizeReturnsHdfsUrl() { + String schemaUrl = "hdfs://nn/schema.avsc"; + org.apache.hadoop.hive.metastore.api.Table table = avroTable(schemaUrl, null); + assertEquals(schemaUrl, + AuthorizationUtils.getFilesystemAvroSchemaUrlToAuthorize(new Table(table))); + } + + @Test + public void getFilesystemSchemaUrlToAuthorizeRejectsHttpUrl() { + org.apache.hadoop.hive.metastore.api.Table table = avroTable("http://example.com/schema.avsc", null); + assertNull(AuthorizationUtils.getFilesystemAvroSchemaUrlToAuthorize(new Table(table))); + } + + @Test + public void addSchemaUrlInputForReadEntityAddsDfsReadEntity() throws SemanticException { + String schemaUrl = "hdfs://nn/schema.avsc"; + Table table = new Table(avroTable(schemaUrl, null)); + Set inputs = new LinkedHashSet<>(); + ReadEntity tableInput = new ReadEntity(table); + inputs.add(tableInput); + + AuthorizationUtils.addAvroSchemaUrlInputForReadEntity(inputs, tableInput); + + assertEquals(2, inputs.size()); + } + + @Test + public void addSchemaUrlInputForReadEntitySkipsIndirectReads() throws SemanticException { + Table table = new Table(avroTable("hdfs://nn/schema.avsc", null)); + Set inputs = new LinkedHashSet<>(); + ReadEntity tableInput = new ReadEntity(table, null, false); + inputs.add(tableInput); + + AuthorizationUtils.addAvroSchemaUrlInputForReadEntity(inputs, tableInput); + + assertEquals(1, inputs.size()); + } + + @Test + public void addSchemaUrlInputForReadEntityAddsDfsDirEntity() throws SemanticException { + String schemaUrl = "hdfs://nn/schema.avsc"; + Table table = new Table(avroTable(schemaUrl, null)); + Set inputs = new LinkedHashSet<>(); + ReadEntity tableInput = new ReadEntity(table); + inputs.add(tableInput); + + AuthorizationUtils.addAvroSchemaUrlInputForReadEntity(inputs, tableInput); + + boolean foundDfsInput = false; + for (ReadEntity input : inputs) { + if (input.getTyp() == Entity.Type.DFS_DIR) { + foundDfsInput = true; + assertTrue(input.getD().toString().contains("hdfs://nn/schema.avsc")); + } + } + assertTrue(foundDfsInput); + } + + private static org.apache.hadoop.hive.metastore.api.Table avroTable(String schemaUrl, + String schemaLiteral) { + org.apache.hadoop.hive.metastore.api.Table table = new org.apache.hadoop.hive.metastore.api.Table(); + table.setDbName("default"); + table.setTableName("avro_tbl"); + Map params = new HashMap<>(); + if (schemaUrl != null) { + params.put(AvroTableProperties.SCHEMA_URL.getPropName(), schemaUrl); + } + if (schemaLiteral != null) { + params.put(AvroTableProperties.SCHEMA_LITERAL.getPropName(), schemaLiteral); + } + table.setParameters(params); + + StorageDescriptor sd = new StorageDescriptor(); + sd.setCols(new ArrayList()); + SerDeInfo serdeInfo = new SerDeInfo(); + serdeInfo.setSerializationLib(AvroSerDe.class.getName()); + sd.setSerdeInfo(serdeInfo); + table.setSd(sd); + return table; + } +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/security/authorization/plugin/metastore/TestHiveMetaStoreAuthorizer.java b/ql/src/test/org/apache/hadoop/hive/ql/security/authorization/plugin/metastore/TestHiveMetaStoreAuthorizer.java index ced07a105841..b539087d5134 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/security/authorization/plugin/metastore/TestHiveMetaStoreAuthorizer.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/security/authorization/plugin/metastore/TestHiveMetaStoreAuthorizer.java @@ -46,6 +46,9 @@ import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType; import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject; import org.apache.hadoop.hive.ql.security.authorization.plugin.metastore.filtercontext.TableFilterContext; +import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils; +import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils.AvroTableProperties; +import org.apache.hadoop.hive.serde2.avro.AvroSerDe; import org.apache.hadoop.security.UserGroupInformation; import org.apache.thrift.TException; import org.junit.Before; @@ -68,6 +71,7 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.fail; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.atLeastOnce; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.eq; import static org.mockito.Mockito.verify; @@ -187,11 +191,13 @@ private Pair, List> getHivePrivil ArgumentCaptor> outputsCapturer = ArgumentCaptor .forClass(class_listPrivObjects); - verify(mockHiveAuthorizer).checkPrivileges(any(HiveOperationType.class), + verify(mockHiveAuthorizer, atLeastOnce()).checkPrivileges(any(HiveOperationType.class), inputsCapturer.capture(), outputsCapturer.capture(), any(HiveAuthzContext.class)); - return new ImmutablePair<>(inputsCapturer.getValue(), outputsCapturer.getValue()); + int lastIdx = inputsCapturer.getAllValues().size() - 1; + return new ImmutablePair<>(inputsCapturer.getAllValues().get(lastIdx), + outputsCapturer.getAllValues().get(lastIdx)); } @Test @@ -842,6 +848,74 @@ public void testUnAuthorizedCause() { } } + @Test + public void testW_CreateAvroTable_SchemaUrlDfsUriPrivObject() throws Exception { + UserGroupInformation.setLoginUser(UserGroupInformation.createRemoteUser(authorizedUser)); + String schemaUrl = "hdfs://namenode:9000/path/to/schema.avsc"; + try { + Table table = new TableBuilder() + .setTableName("avro_tbl") + .addCol("id", ColumnType.INT_TYPE_NAME) + .setSerdeLib(AvroSerDe.class.getName()) + .addTableParam(AvroTableProperties.SCHEMA_URL.getPropName(), schemaUrl) + .setOwner(authorizedUser) + .build(conf); + hmsHandler.create_table(table); + + Pair, List> io = getHivePrivilegeObjectsFromLastCall(); + List inputs = io.getLeft(); + List dfsUriInputs = inputs.stream() + .filter(o -> o.getType() == HivePrivilegeObject.HivePrivilegeObjectType.DFS_URI) + .collect(Collectors.toList()); + + assertTrue("Should authorize read access to avro.schema.url", + dfsUriInputs.stream().anyMatch(o -> schemaUrl.equals(o.getObjectName()))); + } finally { + try { + hmsHandler.drop_table("default", "avro_tbl", true); + } catch (Exception e) { + // Ignore cleanup errors + } + } + } + + @Test + public void testX_AlterAvroTable_SchemaUrlDfsUriPrivObject() throws Exception { + UserGroupInformation.setLoginUser(UserGroupInformation.createRemoteUser(authorizedUser)); + String oldSchemaUrl = "hdfs://namenode:9000/path/to/old_schema.avsc"; + String newSchemaUrl = "hdfs://namenode:9000/path/to/new_schema.avsc"; + try { + Table table = new TableBuilder() + .setTableName("avro_tbl") + .addCol("id", ColumnType.INT_TYPE_NAME) + .setSerdeLib(AvroSerDe.class.getName()) + .addTableParam(AvroTableProperties.SCHEMA_URL.getPropName(), oldSchemaUrl) + .setOwner(authorizedUser) + .build(conf); + hmsHandler.create_table(table); + GetTableRequest request = new GetTableRequest("default", "avro_tbl"); + request.setCatName("hive"); + Table alteredTable = hmsHandler.get_table_core(request); + alteredTable.getParameters().put(AvroTableProperties.SCHEMA_URL.getPropName(), newSchemaUrl); + hmsHandler.alter_table("default", "avro_tbl", alteredTable); + + Pair, List> io = getHivePrivilegeObjectsFromLastCall(); + List inputs = io.getLeft(); + List dfsUriInputs = inputs.stream() + .filter(o -> o.getType() == HivePrivilegeObject.HivePrivilegeObjectType.DFS_URI) + .collect(Collectors.toList()); + + assertTrue("Should authorize read access to updated avro.schema.url", + dfsUriInputs.stream().anyMatch(o -> newSchemaUrl.equals(o.getObjectName()))); + } finally { + try { + hmsHandler.drop_table("default", "avro_tbl", true); + } catch (Exception e) { + // Ignore cleanup errors + } + } + } + @Test public void testDropTableNoTablePathWritePermissionShouldFail() throws Exception { UserGroupInformation.setLoginUser( diff --git a/ql/src/test/results/clientpositive/llap/avro_extschema_insert.q.out b/ql/src/test/results/clientpositive/llap/avro_extschema_insert.q.out index b083819fddc9..cf5e3e0540bb 100644 --- a/ql/src/test/results/clientpositive/llap/avro_extschema_insert.q.out +++ b/ql/src/test/results/clientpositive/llap/avro_extschema_insert.q.out @@ -88,11 +88,13 @@ PREHOOK: query: insert overwrite table avro_extschema_insert2 partition (p1) sel PREHOOK: type: QUERY PREHOOK: Input: default@avro_extschema_insert1 PREHOOK: Input: default@avro_extschema_insert1@p1=part1 +#### A masked pattern was here #### PREHOOK: Output: default@avro_extschema_insert2 POSTHOOK: query: insert overwrite table avro_extschema_insert2 partition (p1) select * from avro_extschema_insert1 POSTHOOK: type: QUERY POSTHOOK: Input: default@avro_extschema_insert1 POSTHOOK: Input: default@avro_extschema_insert1@p1=part1 +#### A masked pattern was here #### POSTHOOK: Output: default@avro_extschema_insert2 POSTHOOK: Output: default@avro_extschema_insert2@p1=part1 POSTHOOK: Lineage: avro_extschema_insert2 PARTITION(p1=part1).col1 SIMPLE [(avro_extschema_insert1)avro_extschema_insert1.FieldSchema(name:col1, type:string, comment:), ] diff --git a/ql/src/test/results/clientpositive/llap/compustat_avro.q.out b/ql/src/test/results/clientpositive/llap/compustat_avro.q.out index 312115e03af5..45e81722d057 100644 --- a/ql/src/test/results/clientpositive/llap/compustat_avro.q.out +++ b/ql/src/test/results/clientpositive/llap/compustat_avro.q.out @@ -48,11 +48,13 @@ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"tru PREHOOK: query: analyze table testAvro compute statistics for columns col1,col3 PREHOOK: type: ANALYZE_TABLE PREHOOK: Input: default@testavro +#### A masked pattern was here #### PREHOOK: Output: default@testavro #### A masked pattern was here #### POSTHOOK: query: analyze table testAvro compute statistics for columns col1,col3 POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@testavro +#### A masked pattern was here #### POSTHOOK: Output: default@testavro #### A masked pattern was here #### PREHOOK: query: describe formatted testAvro col1 diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java index e468c58a6d59..4ddf16af8a8f 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java @@ -43,10 +43,13 @@ import java.nio.Buffer; import java.nio.ByteBuffer; import java.util.Arrays; +import java.util.HashSet; import java.util.List; import java.util.ArrayList; +import java.util.Locale; import java.util.Map; import java.util.Properties; +import java.util.Set; /** * Utilities useful only to the AvroSerde itself. Not mean to be used by @@ -97,7 +100,8 @@ public String getPropName(){ + AvroTableProperties.SCHEMA_LITERAL.getPropName() + " nor " + AvroTableProperties.SCHEMA_URL.getPropName() + " specified, can't determine table schema"; - + private static final Set HTTP_SCHEMES = + new HashSet<>(Arrays.asList("http", "https")); /** * Determine the schema to that's been provided for Avro serde work. @@ -137,12 +141,24 @@ public static Schema determineSchemaOrThrowException(Configuration conf, Propert throw new AvroSerdeException(EXCEPTION_MESSAGE); } + validateSchemaUrl(schemaString, conf); + try { - Schema s = getSchemaFromFS(schemaString, conf); - if (s == null) { - //in case schema is not a file system - return AvroSerdeUtils.getSchemaFor(new URL(schemaString)); + Schema s; + if (requiresHttpFetch(schemaString)) { + URL url = new URL(schemaString); + try (InputStream in = url.openStream()) { + s = getSchemaParser().parse(in); + } + } else { + s = getSchemaFromFS(schemaString, conf); + if (s == null) { + throw new AvroSerdeException("Unable to read Avro schema from: " + schemaString + + ". avro.schema.url must refer to a Hadoop FileSystem URI (e.g. hdfs://, s3a://). " + + "Consider using avro.schema.literal instead."); + } } + materializeResolvedSchema(properties, conf, s); return s; } catch (IOException ioe) { throw new AvroSerdeException("Unable to read schema from given path: " + schemaString, ioe); @@ -151,6 +167,118 @@ public static Schema determineSchemaOrThrowException(Configuration conf, Propert } } + /** + * Validate that the avro.schema.url uses a permitted scheme and host. + */ + static void validateSchemaUrl(String schemaUrl, Configuration conf) throws AvroSerdeException { + final URI uri; + try { + uri = new URI(schemaUrl); + } catch (URISyntaxException e) { + throw new AvroSerdeException("Invalid avro.schema.url: " + schemaUrl, e); + } + + final String scheme = uri.getScheme(); + if (scheme == null || scheme.isEmpty()) { + return; + } + + final String schemeLower = scheme.toLowerCase(Locale.ROOT); + if (HTTP_SCHEMES.contains(schemeLower)) { + if (!HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_AVRO_SCHEMA_URL_REMOTE_HTTP_ENABLED)) { + throw new AvroSerdeException("avro.schema.url scheme '" + scheme + + "' is not permitted. Remote HTTP schema fetch is disabled. Set " + + HiveConf.ConfVars.HIVE_AVRO_SCHEMA_URL_REMOTE_HTTP_ENABLED.varname + + " to true and configure " + + HiveConf.ConfVars.HIVE_AVRO_SCHEMA_URL_HTTP_ALLOWED_HOSTS.varname + + ", or use avro.schema.literal instead."); + } + final String host = uri.getHost(); + if (host == null || host.isEmpty()) { + throw new AvroSerdeException("avro.schema.url must specify a host for HTTP/HTTPS schemas."); + } + if (!isHttpHostAllowed(host, conf)) { + throw new AvroSerdeException("avro.schema.url host '" + host + + "' is not in the permitted host list configured by " + + HiveConf.ConfVars.HIVE_AVRO_SCHEMA_URL_HTTP_ALLOWED_HOSTS.varname + "."); + } + return; + } + + if (!getAllowedFilesystemSchemes(conf).contains(schemeLower)) { + throw new AvroSerdeException("avro.schema.url scheme '" + scheme + + "' is not permitted. Use a Hadoop FileSystem URI (e.g. hdfs://, s3a://) or " + + "avro.schema.literal instead."); + } + } + + /** + * Returns true if the schema URL refers to a Hadoop filesystem location (including scheme-less URIs). + */ + public static boolean isFilesystemSchemaUrl(String schemaUrl) { + if (schemaUrl == null || schemaUrl.isEmpty() || SCHEMA_NONE.equals(schemaUrl)) { + return false; + } + try { + URI uri = new URI(schemaUrl); + String scheme = uri.getScheme(); + if (scheme == null || scheme.isEmpty()) { + return true; + } + String schemeLower = scheme.toLowerCase(Locale.ROOT); + return !HTTP_SCHEMES.contains(schemeLower) + && getAllowedFilesystemSchemes(null).contains(schemeLower); + } catch (URISyntaxException e) { + return false; + } + } + + private static boolean requiresHttpFetch(String schemaUrl) throws AvroSerdeException { + try { + URI uri = new URI(schemaUrl); + String scheme = uri.getScheme(); + return scheme != null && HTTP_SCHEMES.contains(scheme.toLowerCase(Locale.ROOT)); + } catch (URISyntaxException e) { + throw new AvroSerdeException("Invalid avro.schema.url: " + schemaUrl, e); + } + } + + private static Set getAllowedFilesystemSchemes(Configuration conf) { + String schemes = conf == null + ? HiveConf.ConfVars.HIVE_AVRO_SCHEMA_URL_ALLOWED_SCHEMES.getDefaultValue() + : HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_AVRO_SCHEMA_URL_ALLOWED_SCHEMES); + Set allowed = new HashSet<>(); + for (String scheme : schemes.split(",")) { + String trimmed = scheme.trim().toLowerCase(Locale.ROOT); + if (!trimmed.isEmpty()) { + allowed.add(trimmed); + } + } + return allowed; + } + + private static boolean isHttpHostAllowed(String host, Configuration conf) { + String allowedHosts = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_AVRO_SCHEMA_URL_HTTP_ALLOWED_HOSTS); + if (allowedHosts == null || allowedHosts.trim().isEmpty()) { + return false; + } + String hostLower = host.toLowerCase(Locale.ROOT); + for (String allowedHost : allowedHosts.split(",")) { + String trimmed = allowedHost.trim().toLowerCase(Locale.ROOT); + if (!trimmed.isEmpty() && trimmed.equals(hostLower)) { + return true; + } + } + return false; + } + + private static void materializeResolvedSchema(Properties properties, Configuration conf, Schema schema) { + properties.setProperty(AvroTableProperties.SCHEMA_LITERAL.getPropName(), schema.toString()); + if (conf != null) { + conf.set(AvroTableProperties.AVRO_SERDE_SCHEMA.getPropName(), schema.toString(false)); + } + } + // Protected for testing and so we can pass in a conf for testing. protected static Schema getSchemaFromFS(String schemaFSUrl, Configuration conf) throws IOException, URISyntaxException { diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerdeUtils.java b/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerdeUtils.java index 3ef802d605db..a12bc17e1adf 100644 --- a/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerdeUtils.java +++ b/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerdeUtils.java @@ -22,6 +22,7 @@ import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hive.conf.HiveConf; import org.junit.Test; import java.io.IOException; @@ -38,6 +39,7 @@ import static org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils.isNullableType; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; public class TestAvroSerdeUtils { @@ -151,19 +153,103 @@ public void determineSchemaFindsLiterals() throws Exception { } @Test - public void determineSchemaTriesToOpenUrl() throws AvroSerdeException, IOException { + public void determineSchemaRejectsUnknownSchemeUrl() throws AvroSerdeException, IOException { Configuration conf = new Configuration(); Properties props = new Properties(); props.put(AvroTableProperties.SCHEMA_URL.getPropName(), "not:///a.real.url"); try { AvroSerdeUtils.determineSchemaOrThrowException(conf, props); - fail("Should have tried to open that URL"); + fail("Should have rejected unknown scheme URL"); } catch (AvroSerdeException e) { - assertEquals("Unable to read schema from given path: not:///a.real.url", e.getMessage()); + assertTrue(e.getMessage().contains("not permitted")); } } + @Test + public void determineSchemaRejectsHttpByDefault() throws IOException { + Configuration conf = new Configuration(); + Properties props = new Properties(); + props.put(AvroTableProperties.SCHEMA_URL.getPropName(), + "http://169.254.169.254/latest/meta-data/"); + + try { + determineSchemaOrThrowException(conf, props); + fail("Should have rejected HTTP schema URL"); + } catch (AvroSerdeException e) { + assertTrue(e.getMessage().contains("not permitted")); + } + } + + @Test + public void determineSchemaRejectsHttpsByDefault() throws IOException { + Configuration conf = new Configuration(); + Properties props = new Properties(); + props.put(AvroTableProperties.SCHEMA_URL.getPropName(), "https://internal-service/admin"); + + try { + determineSchemaOrThrowException(conf, props); + fail("Should have rejected HTTPS schema URL"); + } catch (AvroSerdeException e) { + assertTrue(e.getMessage().contains("not permitted")); + } + } + + @Test + public void determineSchemaRejectsFileUrl() throws IOException { + Configuration conf = new Configuration(); + Properties props = new Properties(); + props.put(AvroTableProperties.SCHEMA_URL.getPropName(), "file:///etc/passwd"); + + try { + determineSchemaOrThrowException(conf, props); + fail("Should have rejected file:// schema URL"); + } catch (AvroSerdeException e) { + assertTrue(e.getMessage().contains("not permitted")); + } + } + + @Test + public void determineSchemaRejectsHttpWhenHostNotAllowlisted() throws IOException { + HiveConf conf = new HiveConf(); + conf.setBoolVar(HiveConf.ConfVars.HIVE_AVRO_SCHEMA_URL_REMOTE_HTTP_ENABLED, true); + conf.setVar(HiveConf.ConfVars.HIVE_AVRO_SCHEMA_URL_HTTP_ALLOWED_HOSTS, "schema.example.com"); + Properties props = new Properties(); + props.put(AvroTableProperties.SCHEMA_URL.getPropName(), + "http://169.254.169.254/latest/meta-data/"); + + try { + determineSchemaOrThrowException(conf, props); + fail("Should have rejected HTTP host not in allowlist"); + } catch (AvroSerdeException e) { + assertTrue(e.getMessage().contains("not in the permitted host list")); + } + } + + @Test + public void determineSchemaAllowsHttpWhenHostAllowlisted() throws IOException { + HiveConf conf = new HiveConf(); + conf.setBoolVar(HiveConf.ConfVars.HIVE_AVRO_SCHEMA_URL_REMOTE_HTTP_ENABLED, true); + conf.setVar(HiveConf.ConfVars.HIVE_AVRO_SCHEMA_URL_HTTP_ALLOWED_HOSTS, "schema.example.com"); + Properties props = new Properties(); + props.put(AvroTableProperties.SCHEMA_URL.getPropName(), "http://schema.example.com/schema.avsc"); + + try { + determineSchemaOrThrowException(conf, props); + fail("Expected fetch failure after HTTP validation passed"); + } catch (AvroSerdeException e) { + assertTrue(e.getMessage().contains("Unable to read schema from given path")); + } + } + + @Test + public void isFilesystemSchemaUrlIdentifiesFilesystemUrls() { + assertTrue(AvroSerdeUtils.isFilesystemSchemaUrl("hdfs://nn/schema.avsc")); + assertTrue(AvroSerdeUtils.isFilesystemSchemaUrl("/tmp/schema.avsc")); + assertTrue(!AvroSerdeUtils.isFilesystemSchemaUrl("http://example.com/schema.avsc")); + assertTrue(!AvroSerdeUtils.isFilesystemSchemaUrl("file:///etc/passwd")); + } + @Test public void noneOptionWorksForSpecifyingSchemas() throws IOException, AvroSerdeException { Configuration conf = new Configuration(); @@ -195,9 +281,9 @@ public void noneOptionWorksForSpecifyingSchemas() throws IOException, AvroSerdeE props.put(AvroTableProperties.SCHEMA_URL.getPropName(), "not:///a.real.url"); try { determineSchemaOrThrowException(conf, props); - fail("Should have tried to open that bogus URL"); + fail("Should have rejected unknown scheme URL"); } catch (AvroSerdeException e) { - assertEquals("Unable to read schema from given path: not:///a.real.url", e.getMessage()); + assertTrue(e.getMessage().contains("not permitted")); } } @@ -221,6 +307,12 @@ public void determineSchemaCanReadSchemaFromHDFS() throws IOException, AvroSerde AvroSerdeUtils.getSchemaFromFS(onHDFS, miniDfs.getFileSystem().getConf()); Schema expectedSchema = AvroSerdeUtils.getSchemaFor(schemaString); assertEquals(expectedSchema, schemaFromHDFS); + + Properties props = new Properties(); + props.put(AvroTableProperties.SCHEMA_URL.getPropName(), onHDFS); + Schema resolved = determineSchemaOrThrowException(miniDfs.getFileSystem().getConf(), props); + assertEquals(expectedSchema, resolved); + assertNotNull(props.getProperty(AvroTableProperties.SCHEMA_LITERAL.getPropName())); } finally { if(miniDfs != null) miniDfs.shutdown(); }