From b86ca3b0c85e69b5cba2debc4b4acaa3a3861b82 Mon Sep 17 00:00:00 2001 From: Ryan Johnson Date: Wed, 21 Jan 2026 13:31:54 -0800 Subject: [PATCH 1/3] move extension type construction logic out of Field --- arrow-schema/src/extension/canonical/bool8.rs | 2 +- .../extension/canonical/fixed_shape_tensor.rs | 2 +- arrow-schema/src/extension/canonical/json.rs | 2 +- .../src/extension/canonical/opaque.rs | 2 +- arrow-schema/src/extension/canonical/uuid.rs | 2 +- .../canonical/variable_shape_tensor.rs | 2 +- arrow-schema/src/extension/mod.rs | 38 +++++++++++++++++++ arrow-schema/src/field.rs | 20 +--------- 8 files changed, 45 insertions(+), 25 deletions(-) diff --git a/arrow-schema/src/extension/canonical/bool8.rs b/arrow-schema/src/extension/canonical/bool8.rs index 362a2cc018c7..c94c8217b8ff 100644 --- a/arrow-schema/src/extension/canonical/bool8.rs +++ b/arrow-schema/src/extension/canonical/bool8.rs @@ -96,7 +96,7 @@ mod tests { } #[test] - #[should_panic(expected = "Field extension type name missing")] + #[should_panic(expected = "Extension type name missing")] fn missing_name() { let field = Field::new("", DataType::Int8, false).with_metadata( [(EXTENSION_TYPE_METADATA_KEY.to_owned(), "".to_owned())] diff --git a/arrow-schema/src/extension/canonical/fixed_shape_tensor.rs b/arrow-schema/src/extension/canonical/fixed_shape_tensor.rs index b6bd1c1223f4..5157eefe9ebb 100644 --- a/arrow-schema/src/extension/canonical/fixed_shape_tensor.rs +++ b/arrow-schema/src/extension/canonical/fixed_shape_tensor.rs @@ -471,7 +471,7 @@ mod tests { } #[test] - #[should_panic(expected = "Field extension type name missing")] + #[should_panic(expected = "Extension type name missing")] fn missing_name() { let field = Field::new_fixed_size_list("", Field::new("", DataType::Float32, false), 3, false) diff --git a/arrow-schema/src/extension/canonical/json.rs b/arrow-schema/src/extension/canonical/json.rs index 297a2d99aa04..d2a54b9189b7 100644 --- a/arrow-schema/src/extension/canonical/json.rs +++ b/arrow-schema/src/extension/canonical/json.rs @@ -222,7 +222,7 @@ mod tests { } #[test] - #[should_panic(expected = "Field extension type name missing")] + #[should_panic(expected = "Extension type name missing")] fn missing_name() { let field = Field::new("", DataType::Int8, false).with_metadata( [(EXTENSION_TYPE_METADATA_KEY.to_owned(), "{}".to_owned())] diff --git a/arrow-schema/src/extension/canonical/opaque.rs b/arrow-schema/src/extension/canonical/opaque.rs index fceae8d3711d..acfc1331a670 100644 --- a/arrow-schema/src/extension/canonical/opaque.rs +++ b/arrow-schema/src/extension/canonical/opaque.rs @@ -285,7 +285,7 @@ mod tests { } #[test] - #[should_panic(expected = "Field extension type name missing")] + #[should_panic(expected = "Extension type name missing")] fn missing_name() { let field = Field::new("", DataType::Null, false).with_metadata( [( diff --git a/arrow-schema/src/extension/canonical/uuid.rs b/arrow-schema/src/extension/canonical/uuid.rs index 09533564ed44..3e897f47318d 100644 --- a/arrow-schema/src/extension/canonical/uuid.rs +++ b/arrow-schema/src/extension/canonical/uuid.rs @@ -100,7 +100,7 @@ mod tests { } #[test] - #[should_panic(expected = "Field extension type name missing")] + #[should_panic(expected = "Extension type name missing")] fn missing_name() { let field = Field::new("", DataType::FixedSizeBinary(16), false); field.extension_type::(); diff --git a/arrow-schema/src/extension/canonical/variable_shape_tensor.rs b/arrow-schema/src/extension/canonical/variable_shape_tensor.rs index b5403dcf684f..fbc641f54366 100644 --- a/arrow-schema/src/extension/canonical/variable_shape_tensor.rs +++ b/arrow-schema/src/extension/canonical/variable_shape_tensor.rs @@ -529,7 +529,7 @@ mod tests { } #[test] - #[should_panic(expected = "Field extension type name missing")] + #[should_panic(expected = "Extension type name missing")] fn missing_name() { let field = Field::new_struct( "", diff --git a/arrow-schema/src/extension/mod.rs b/arrow-schema/src/extension/mod.rs index cd17272e15ab..b356d0b61422 100644 --- a/arrow-schema/src/extension/mod.rs +++ b/arrow-schema/src/extension/mod.rs @@ -23,6 +23,7 @@ mod canonical; pub use canonical::*; use crate::{ArrowError, DataType}; +use std::collections::HashMap; /// The metadata key for the string name identifying an [`ExtensionType`]. pub const EXTENSION_TYPE_NAME_KEY: &str = "ARROW:extension:name"; @@ -255,4 +256,41 @@ pub trait ExtensionType: Sized { /// This should return an error if the given data type is not supported by /// this extension type. fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result; + + /// Construct this extension type from field metadata and data type. + /// + /// This is a provided method that extracts extension type information from + /// metadata (using [`EXTENSION_TYPE_NAME_KEY`] and + /// [`EXTENSION_TYPE_METADATA_KEY`]) and delegates to [`Self::try_new`]. + /// + /// Returns an error if: + /// - The extension type name is missing or doesn't match [`Self::NAME`] + /// - Metadata deserialization fails + /// - The data type is not supported + /// + /// This method enables extension type checking without requiring a full + /// [`Field`] instance, useful when only metadata and data type are available. + /// + /// [`Field`]: crate::Field + fn try_from_parts( + metadata: &HashMap, + data_type: &DataType, + ) -> Result { + match metadata.get(EXTENSION_TYPE_NAME_KEY).map(|s| s.as_str()) { + Some(Self::NAME) => { + let ext_metadata = metadata + .get(EXTENSION_TYPE_METADATA_KEY) + .map(|s| s.as_str()); + let parsed = Self::deserialize_metadata(ext_metadata)?; + Self::try_new(data_type, parsed) + } + Some(name) => Err(ArrowError::InvalidArgumentError(format!( + "Extension type name mismatch: expected {}, got {name}", + Self::NAME + ))), + None => Err(ArrowError::InvalidArgumentError( + "Extension type name missing".to_string() + )), + } + } } diff --git a/arrow-schema/src/field.rs b/arrow-schema/src/field.rs index c4566e41bfa8..27d0b0c46e51 100644 --- a/arrow-schema/src/field.rs +++ b/arrow-schema/src/field.rs @@ -575,25 +575,7 @@ impl Field { /// } /// ``` pub fn try_extension_type(&self) -> Result { - // Check the extension name in the metadata - match self.extension_type_name() { - // It should match the name of the given extension type - Some(name) if name == E::NAME => { - // Deserialize the metadata and try to construct the extension - // type - E::deserialize_metadata(self.extension_type_metadata()) - .and_then(|metadata| E::try_new(self.data_type(), metadata)) - } - // Name mismatch - Some(name) => Err(ArrowError::InvalidArgumentError(format!( - "Field extension type name mismatch, expected {}, found {name}", - E::NAME - ))), - // Name missing - None => Err(ArrowError::InvalidArgumentError( - "Field extension type name missing".to_owned(), - )), - } + E::try_from_parts(self.metadata(), self.data_type()) } /// Returns an instance of the given [`ExtensionType`] of this [`Field`], From 47844617947104d438a0ea9076c75ef2f54329a2 Mon Sep 17 00:00:00 2001 From: Ryan Johnson Date: Mon, 26 Jan 2026 09:38:54 -0800 Subject: [PATCH 2/3] cleanups --- arrow-schema/src/extension/mod.rs | 13 +++++++++---- arrow-schema/src/field.rs | 2 +- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/arrow-schema/src/extension/mod.rs b/arrow-schema/src/extension/mod.rs index b356d0b61422..aed560029db8 100644 --- a/arrow-schema/src/extension/mod.rs +++ b/arrow-schema/src/extension/mod.rs @@ -272,24 +272,29 @@ pub trait ExtensionType: Sized { /// [`Field`] instance, useful when only metadata and data type are available. /// /// [`Field`]: crate::Field - fn try_from_parts( - metadata: &HashMap, + fn try_new_from_field_metadata( data_type: &DataType, + metadata: &HashMap, ) -> Result { + // Check the extension name in the metadata match metadata.get(EXTENSION_TYPE_NAME_KEY).map(|s| s.as_str()) { - Some(Self::NAME) => { + // It should match the name of the given extension type + Some(name) if name == Self::NAME => { + // Deserialize the metadata and try to construct the extension type let ext_metadata = metadata .get(EXTENSION_TYPE_METADATA_KEY) .map(|s| s.as_str()); let parsed = Self::deserialize_metadata(ext_metadata)?; Self::try_new(data_type, parsed) } + // Name mismatch Some(name) => Err(ArrowError::InvalidArgumentError(format!( "Extension type name mismatch: expected {}, got {name}", Self::NAME ))), + // Name missing None => Err(ArrowError::InvalidArgumentError( - "Extension type name missing".to_string() + "Extension type name missing".to_string(), )), } } diff --git a/arrow-schema/src/field.rs b/arrow-schema/src/field.rs index 27d0b0c46e51..a1c509abf2e0 100644 --- a/arrow-schema/src/field.rs +++ b/arrow-schema/src/field.rs @@ -575,7 +575,7 @@ impl Field { /// } /// ``` pub fn try_extension_type(&self) -> Result { - E::try_from_parts(self.metadata(), self.data_type()) + E::try_new_from_field_metadata(self.data_type(), self.metadata()) } /// Returns an instance of the given [`ExtensionType`] of this [`Field`], From 40a276e350c4138be16b4e7a045a6a82d1f31085 Mon Sep 17 00:00:00 2001 From: Ryan Johnson Date: Mon, 26 Jan 2026 12:20:05 -0800 Subject: [PATCH 3/3] fix a few more test expectations --- arrow-schema/src/extension/canonical/timestamp_with_offset.rs | 2 +- parquet/src/arrow/schema/virtual_type.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arrow-schema/src/extension/canonical/timestamp_with_offset.rs b/arrow-schema/src/extension/canonical/timestamp_with_offset.rs index 643025919d94..20df20bad922 100644 --- a/arrow-schema/src/extension/canonical/timestamp_with_offset.rs +++ b/arrow-schema/src/extension/canonical/timestamp_with_offset.rs @@ -300,7 +300,7 @@ mod tests { } #[test] - #[should_panic(expected = "Field extension type name missing")] + #[should_panic(expected = "Extension type name missing")] fn missing_name() { let field = make_valid_field_primitive(TimeUnit::Second) .with_metadata([(EXTENSION_TYPE_METADATA_KEY.to_owned(), "".to_owned())].into()); diff --git a/parquet/src/arrow/schema/virtual_type.rs b/parquet/src/arrow/schema/virtual_type.rs index b71753f61c93..657a76b73229 100644 --- a/parquet/src/arrow/schema/virtual_type.rs +++ b/parquet/src/arrow/schema/virtual_type.rs @@ -143,7 +143,7 @@ mod tests { } #[test] - #[should_panic(expected = "Field extension type name missing")] + #[should_panic(expected = "Extension type name missing")] fn row_number_missing_name() { let field = Field::new("", DataType::Int64, false).with_metadata( [(EXTENSION_TYPE_METADATA_KEY.to_owned(), "".to_owned())] @@ -203,7 +203,7 @@ mod tests { } #[test] - #[should_panic(expected = "Field extension type name missing")] + #[should_panic(expected = "Extension type name missing")] fn row_group_index_missing_name() { let field = Field::new("", DataType::Int64, false).with_metadata( [(EXTENSION_TYPE_METADATA_KEY.to_owned(), "".to_owned())]