diff --git a/arrow-schema/src/extension/canonical/bool8.rs b/arrow-schema/src/extension/canonical/bool8.rs index 362a2cc018c7..c94c8217b8ff 100644 --- a/arrow-schema/src/extension/canonical/bool8.rs +++ b/arrow-schema/src/extension/canonical/bool8.rs @@ -96,7 +96,7 @@ mod tests { } #[test] - #[should_panic(expected = "Field extension type name missing")] + #[should_panic(expected = "Extension type name missing")] fn missing_name() { let field = Field::new("", DataType::Int8, false).with_metadata( [(EXTENSION_TYPE_METADATA_KEY.to_owned(), "".to_owned())] diff --git a/arrow-schema/src/extension/canonical/fixed_shape_tensor.rs b/arrow-schema/src/extension/canonical/fixed_shape_tensor.rs index b6bd1c1223f4..5157eefe9ebb 100644 --- a/arrow-schema/src/extension/canonical/fixed_shape_tensor.rs +++ b/arrow-schema/src/extension/canonical/fixed_shape_tensor.rs @@ -471,7 +471,7 @@ mod tests { } #[test] - #[should_panic(expected = "Field extension type name missing")] + #[should_panic(expected = "Extension type name missing")] fn missing_name() { let field = Field::new_fixed_size_list("", Field::new("", DataType::Float32, false), 3, false) diff --git a/arrow-schema/src/extension/canonical/json.rs b/arrow-schema/src/extension/canonical/json.rs index 297a2d99aa04..d2a54b9189b7 100644 --- a/arrow-schema/src/extension/canonical/json.rs +++ b/arrow-schema/src/extension/canonical/json.rs @@ -222,7 +222,7 @@ mod tests { } #[test] - #[should_panic(expected = "Field extension type name missing")] + #[should_panic(expected = "Extension type name missing")] fn missing_name() { let field = Field::new("", DataType::Int8, false).with_metadata( [(EXTENSION_TYPE_METADATA_KEY.to_owned(), "{}".to_owned())] diff --git a/arrow-schema/src/extension/canonical/opaque.rs b/arrow-schema/src/extension/canonical/opaque.rs index fceae8d3711d..acfc1331a670 100644 --- a/arrow-schema/src/extension/canonical/opaque.rs +++ b/arrow-schema/src/extension/canonical/opaque.rs @@ -285,7 +285,7 @@ mod tests { } #[test] - #[should_panic(expected = "Field extension type name missing")] + #[should_panic(expected = "Extension type name missing")] fn missing_name() { let field = Field::new("", DataType::Null, false).with_metadata( [( diff --git a/arrow-schema/src/extension/canonical/timestamp_with_offset.rs b/arrow-schema/src/extension/canonical/timestamp_with_offset.rs index 643025919d94..20df20bad922 100644 --- a/arrow-schema/src/extension/canonical/timestamp_with_offset.rs +++ b/arrow-schema/src/extension/canonical/timestamp_with_offset.rs @@ -300,7 +300,7 @@ mod tests { } #[test] - #[should_panic(expected = "Field extension type name missing")] + #[should_panic(expected = "Extension type name missing")] fn missing_name() { let field = make_valid_field_primitive(TimeUnit::Second) .with_metadata([(EXTENSION_TYPE_METADATA_KEY.to_owned(), "".to_owned())].into()); diff --git a/arrow-schema/src/extension/canonical/uuid.rs b/arrow-schema/src/extension/canonical/uuid.rs index 09533564ed44..3e897f47318d 100644 --- a/arrow-schema/src/extension/canonical/uuid.rs +++ b/arrow-schema/src/extension/canonical/uuid.rs @@ -100,7 +100,7 @@ mod tests { } #[test] - #[should_panic(expected = "Field extension type name missing")] + #[should_panic(expected = "Extension type name missing")] fn missing_name() { let field = Field::new("", DataType::FixedSizeBinary(16), false); field.extension_type::(); diff --git a/arrow-schema/src/extension/canonical/variable_shape_tensor.rs b/arrow-schema/src/extension/canonical/variable_shape_tensor.rs index b5403dcf684f..fbc641f54366 100644 --- a/arrow-schema/src/extension/canonical/variable_shape_tensor.rs +++ b/arrow-schema/src/extension/canonical/variable_shape_tensor.rs @@ -529,7 +529,7 @@ mod tests { } #[test] - #[should_panic(expected = "Field extension type name missing")] + #[should_panic(expected = "Extension type name missing")] fn missing_name() { let field = Field::new_struct( "", diff --git a/arrow-schema/src/extension/mod.rs b/arrow-schema/src/extension/mod.rs index cd17272e15ab..aed560029db8 100644 --- a/arrow-schema/src/extension/mod.rs +++ b/arrow-schema/src/extension/mod.rs @@ -23,6 +23,7 @@ mod canonical; pub use canonical::*; use crate::{ArrowError, DataType}; +use std::collections::HashMap; /// The metadata key for the string name identifying an [`ExtensionType`]. pub const EXTENSION_TYPE_NAME_KEY: &str = "ARROW:extension:name"; @@ -255,4 +256,46 @@ pub trait ExtensionType: Sized { /// This should return an error if the given data type is not supported by /// this extension type. fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result; + + /// Construct this extension type from field metadata and data type. + /// + /// This is a provided method that extracts extension type information from + /// metadata (using [`EXTENSION_TYPE_NAME_KEY`] and + /// [`EXTENSION_TYPE_METADATA_KEY`]) and delegates to [`Self::try_new`]. + /// + /// Returns an error if: + /// - The extension type name is missing or doesn't match [`Self::NAME`] + /// - Metadata deserialization fails + /// - The data type is not supported + /// + /// This method enables extension type checking without requiring a full + /// [`Field`] instance, useful when only metadata and data type are available. + /// + /// [`Field`]: crate::Field + fn try_new_from_field_metadata( + data_type: &DataType, + metadata: &HashMap, + ) -> Result { + // Check the extension name in the metadata + match metadata.get(EXTENSION_TYPE_NAME_KEY).map(|s| s.as_str()) { + // It should match the name of the given extension type + Some(name) if name == Self::NAME => { + // Deserialize the metadata and try to construct the extension type + let ext_metadata = metadata + .get(EXTENSION_TYPE_METADATA_KEY) + .map(|s| s.as_str()); + let parsed = Self::deserialize_metadata(ext_metadata)?; + Self::try_new(data_type, parsed) + } + // Name mismatch + Some(name) => Err(ArrowError::InvalidArgumentError(format!( + "Extension type name mismatch: expected {}, got {name}", + Self::NAME + ))), + // Name missing + None => Err(ArrowError::InvalidArgumentError( + "Extension type name missing".to_string(), + )), + } + } } diff --git a/arrow-schema/src/field.rs b/arrow-schema/src/field.rs index c4566e41bfa8..a1c509abf2e0 100644 --- a/arrow-schema/src/field.rs +++ b/arrow-schema/src/field.rs @@ -575,25 +575,7 @@ impl Field { /// } /// ``` pub fn try_extension_type(&self) -> Result { - // Check the extension name in the metadata - match self.extension_type_name() { - // It should match the name of the given extension type - Some(name) if name == E::NAME => { - // Deserialize the metadata and try to construct the extension - // type - E::deserialize_metadata(self.extension_type_metadata()) - .and_then(|metadata| E::try_new(self.data_type(), metadata)) - } - // Name mismatch - Some(name) => Err(ArrowError::InvalidArgumentError(format!( - "Field extension type name mismatch, expected {}, found {name}", - E::NAME - ))), - // Name missing - None => Err(ArrowError::InvalidArgumentError( - "Field extension type name missing".to_owned(), - )), - } + E::try_new_from_field_metadata(self.data_type(), self.metadata()) } /// Returns an instance of the given [`ExtensionType`] of this [`Field`], diff --git a/parquet/src/arrow/schema/virtual_type.rs b/parquet/src/arrow/schema/virtual_type.rs index b71753f61c93..657a76b73229 100644 --- a/parquet/src/arrow/schema/virtual_type.rs +++ b/parquet/src/arrow/schema/virtual_type.rs @@ -143,7 +143,7 @@ mod tests { } #[test] - #[should_panic(expected = "Field extension type name missing")] + #[should_panic(expected = "Extension type name missing")] fn row_number_missing_name() { let field = Field::new("", DataType::Int64, false).with_metadata( [(EXTENSION_TYPE_METADATA_KEY.to_owned(), "".to_owned())] @@ -203,7 +203,7 @@ mod tests { } #[test] - #[should_panic(expected = "Field extension type name missing")] + #[should_panic(expected = "Extension type name missing")] fn row_group_index_missing_name() { let field = Field::new("", DataType::Int64, false).with_metadata( [(EXTENSION_TYPE_METADATA_KEY.to_owned(), "".to_owned())]