From 1546bdc252c02288de9f34efc20cda8d0e85e8b5 Mon Sep 17 00:00:00 2001 From: sytherax Date: Tue, 10 Feb 2026 19:15:21 +1100 Subject: [PATCH 1/2] feat: add support for field metadata and list element annotations in ArrowField --- README.md | 33 ++++++ arrow_convert/src/field.rs | 59 ++++++++++ arrow_convert/tests/test_schema.rs | 131 ++++++++++++++++++++++ arrow_convert_derive/src/derive_struct.rs | 61 +++++++++- arrow_convert_derive/src/input.rs | 117 +++++++++++++++++++ 5 files changed, 400 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 944cd20..05de141 100644 --- a/README.md +++ b/README.md @@ -101,6 +101,39 @@ Precedence: `#[arrow_field(name)]` > `rename_all` > Rust field name. Supported `rename_all` values: `lowercase`, `UPPERCASE`, `camelCase`, `PascalCase`, `snake_case`, `SCREAMING_SNAKE_CASE`, `kebab-case`, `SCREAMING-KEBAB-CASE`. +### Field Metadata and List Element Annotations + +Arrow field metadata can be attached via `metadata(...)` and list element metadata can be attached via `list_element_metadata(...)`. + +```rust +# use arrow_convert::ArrowField; +#[derive(ArrowField)] +struct SchemaAnnotated { + #[arrow_field(metadata(role = "top", PARQUET::field_id = "7"))] + top_level: i64, + #[arrow_field( + list_element_name = "element", + list_element_metadata(PARQUET::field_id = "9", scope = "book") + )] + prices: Vec, +} +``` + +List element naming can be set at the container level and overridden per field: + +```rust +# use arrow_convert::ArrowField; +#[derive(ArrowField)] +#[arrow_field(list_element_name = "entry")] +struct Lists { + bids: Vec, // list child field name -> "entry" + #[arrow_field(list_element_name = "level")] + asks: Vec, // list child field name -> "level" +} +``` + +For list element metadata, precedence is field-level override over container-level defaults for matching keys. + ### i128 i128 represents a decimal number and requires the precision and scale to be specified to be used as an Arrow data type. The precision and scale can be specified by using a type override via the `I128` type. diff --git a/arrow_convert/src/field.rs b/arrow_convert/src/field.rs index 0cb1c98..7f58881 100644 --- a/arrow_convert/src/field.rs +++ b/arrow_convert/src/field.rs @@ -9,6 +9,65 @@ use chrono::{DateTime, NaiveDate, NaiveDateTime, Utc}; /// The default field name used when a specific name is not provided. pub const DEFAULT_FIELD_NAME: &str = "_item"; +/// Overrides the element field name for list-like datatypes on the given field. +/// +/// This only affects the direct child field of: +/// - `DataType::List` +/// - `DataType::LargeList` +/// - `DataType::FixedSizeList` +/// +/// For other datatypes, this function is a no-op. +pub fn with_list_element_name(field: Field, list_element_name: Option<&str>) -> Field { + let Some(list_element_name) = list_element_name else { + return field; + }; + + match field.data_type().clone() { + DataType::List(child) => field.with_data_type(DataType::List(Arc::new( + child.as_ref().clone().with_name(list_element_name), + ))), + DataType::LargeList(child) => field.with_data_type(DataType::LargeList(Arc::new( + child.as_ref().clone().with_name(list_element_name), + ))), + DataType::FixedSizeList(child, size) => field.with_data_type(DataType::FixedSizeList( + Arc::new(child.as_ref().clone().with_name(list_element_name)), + size, + )), + _ => field, + } +} + +/// Adds or overrides metadata entries on a field. +pub fn with_field_metadata(mut field: Field, metadata: Vec<(String, String)>) -> Field { + for (key, value) in metadata { + field.metadata_mut().insert(key, value); + } + field +} + +/// Adds or overrides metadata entries on the list-like element child field. +pub fn with_list_element_metadata(field: Field, metadata: Vec<(String, String)>) -> Field { + if metadata.is_empty() { + return field; + } + + match field.data_type().clone() { + DataType::List(child) => { + let child = with_field_metadata(child.as_ref().clone(), metadata); + field.with_data_type(DataType::List(Arc::new(child))) + } + DataType::LargeList(child) => { + let child = with_field_metadata(child.as_ref().clone(), metadata); + field.with_data_type(DataType::LargeList(Arc::new(child))) + } + DataType::FixedSizeList(child, size) => { + let child = with_field_metadata(child.as_ref().clone(), metadata); + field.with_data_type(DataType::FixedSizeList(Arc::new(child), size)) + } + _ => field, + } +} + /// Trait implemented by all types that can be used as an Arrow field. /// /// Implementations are provided for types already supported by the arrow crate: diff --git a/arrow_convert/tests/test_schema.rs b/arrow_convert/tests/test_schema.rs index 9928de6..f304c99 100644 --- a/arrow_convert/tests/test_schema.rs +++ b/arrow_convert/tests/test_schema.rs @@ -253,3 +253,134 @@ fn test_large_string_schema() { ))) ); } + +#[test] +fn test_field_name_override_with_rename_all() { + #[derive(Debug, ArrowField)] + #[allow(dead_code)] + #[arrow_field(rename_all = "camelCase")] + struct Root { + plain_field: i32, + #[arrow_field(name = "custom_name")] + renamed_field: i32, + r#type: i32, + } + + let DataType::Struct(fields) = ::data_type() else { + panic!("expected struct datatype"); + }; + + let names: Vec<_> = fields.iter().map(|field| field.name().to_string()).collect(); + assert_eq!(names, vec!["plainField", "custom_name", "type"]); +} + +#[test] +fn test_rename_all_composes_with_type_name_and_skip() { + #[derive(Debug, ArrowField)] + #[allow(dead_code)] + #[allow(non_snake_case)] + #[arrow_field(rename_all = "snake_case")] + struct Root { + plainField: i32, + #[arrow_field(type = "arrow_convert::field::LargeString")] + optionalLabel: Option, + #[arrow_field(type = "arrow_convert::field::LargeVec", name = "custom_list")] + ignoredNameByRenameAll: Vec, + #[arrow_field(skip)] + shouldSkip: i32, + } + + let DataType::Struct(fields) = ::data_type() else { + panic!("expected struct datatype"); + }; + + assert_eq!(fields.len(), 3); + + assert_eq!(fields[0].name(), "plain_field"); + assert_eq!(fields[0].data_type(), &DataType::Int32); + assert!(!fields[0].is_nullable()); + + assert_eq!(fields[1].name(), "optional_label"); + assert_eq!(fields[1].data_type(), &DataType::LargeUtf8); + assert!(!fields[1].is_nullable()); + + assert_eq!(fields[2].name(), "custom_list"); + assert_eq!( + fields[2].data_type(), + &DataType::LargeList(Arc::new(Field::new(DEFAULT_FIELD_NAME, DataType::Int64, false))) + ); + assert!(!fields[2].is_nullable()); +} + +#[test] +fn test_list_element_name_container_and_field_override() { + #[derive(Debug, ArrowField)] + #[allow(dead_code)] + #[arrow_field(list_element_name = "entry")] + struct Root { + numbers: Vec, + #[arrow_field(list_element_name = "node")] + labels: Vec, + scalar: i64, + } + + let DataType::Struct(fields) = ::data_type() else { + panic!("expected struct datatype"); + }; + + assert_eq!( + fields[0].data_type(), + &DataType::List(Arc::new(Field::new("entry", DataType::Int32, false))) + ); + assert_eq!( + fields[1].data_type(), + &DataType::List(Arc::new(Field::new("node", DataType::Utf8, false))) + ); + assert_eq!(fields[2].data_type(), &DataType::Int64); +} + +#[test] +fn test_metadata_support_for_parquet_field_id_keys() { + #[derive(Debug, ArrowField)] + #[allow(dead_code)] + #[arrow_field(list_element_metadata(scope = "container", PARQUET::field_id = "101"))] + struct Root { + #[arrow_field( + metadata(role = "top", PARQUET::field_id = "7"), + list_element_metadata(scope = "field", level = "1", PARQUET::field_id = "9") + )] + bids: Vec, + asks: Vec, + } + + let DataType::Struct(fields) = ::data_type() else { + panic!("expected struct datatype"); + }; + + let bids = &fields[0]; + assert_eq!(bids.metadata().get("role"), Some(&"top".to_string())); + assert_eq!(bids.metadata().get("PARQUET:field_id"), Some(&"7".to_string())); + let DataType::List(bids_element) = bids.data_type() else { + panic!("expected list datatype for bids"); + }; + assert_eq!(bids_element.metadata().get("scope"), Some(&"field".to_string())); + assert_eq!(bids_element.metadata().get("level"), Some(&"1".to_string())); + assert_eq!( + bids_element.metadata().get("PARQUET:field_id"), + Some(&"9".to_string()) + ); + + let asks = &fields[1]; + assert!(asks.metadata().get("role").is_none()); + let DataType::List(asks_element) = asks.data_type() else { + panic!("expected list datatype for asks"); + }; + assert_eq!( + asks_element.metadata().get("scope"), + Some(&"container".to_string()) + ); + assert_eq!( + asks_element.metadata().get("PARQUET:field_id"), + Some(&"101".to_string()) + ); +} diff --git a/arrow_convert_derive/src/derive_struct.rs b/arrow_convert_derive/src/derive_struct.rs index ca28477..f99742a 100644 --- a/arrow_convert_derive/src/derive_struct.rs +++ b/arrow_convert_derive/src/derive_struct.rs @@ -14,6 +14,9 @@ struct Common<'a> { field_indices: Vec, field_types: Vec<&'a syn::Type>, field_names: Vec, + field_list_element_names: Vec>, + field_metadata: Vec>, + field_list_element_metadata: Vec>, } impl<'a> From<&'a DeriveStruct> for Common<'a> { @@ -85,6 +88,15 @@ impl<'a> From<&'a DeriveStruct> for Common<'a> { .enumerate() .map(|(id, field)| field.effective_name(id, input.rename_all)) .collect::>(); + let field_list_element_names = fields + .iter() + .map(|field| field.effective_list_element_name(input.list_element_name.as_deref())) + .collect::>(); + let field_metadata = fields.iter().map(|field| field.metadata.clone()).collect::>(); + let field_list_element_metadata = fields + .iter() + .map(|field| field.effective_list_element_metadata(&input.list_element_metadata)) + .collect::>(); Self { original_name, @@ -95,6 +107,9 @@ impl<'a> From<&'a DeriveStruct> for Common<'a> { field_indices, field_types, field_names, + field_list_element_names, + field_metadata, + field_list_element_metadata, } } } @@ -104,8 +119,38 @@ pub fn expand_field(input: DeriveStruct) -> TokenStream { original_name, field_types, field_names, + field_list_element_names, + field_metadata, + field_list_element_metadata, .. } = (&input).into(); + let field_list_element_name_exprs = field_list_element_names + .iter() + .map(|name| match name { + Some(name) => quote!(Some(#name)), + None => quote!(None), + }) + .collect::>(); + let field_metadata_exprs = field_metadata + .iter() + .map(|entries| { + let entries = entries + .iter() + .map(|(k, v)| quote!((#k.to_string(), #v.to_string()))) + .collect::>(); + quote!(vec![#(#entries),*]) + }) + .collect::>(); + let field_list_element_metadata_exprs = field_list_element_metadata + .iter() + .map(|entries| { + let entries = entries + .iter() + .map(|(k, v)| quote!((#k.to_string(), #v.to_string()))) + .collect::>(); + quote!(vec![#(#entries),*]) + }) + .collect::>(); let arrow_schema_impl = if input.fields.len() == 1 && input.is_transparent { quote! {} @@ -115,7 +160,21 @@ pub fn expand_field(input: DeriveStruct) -> TokenStream { pub fn arrow_schema() -> arrow::datatypes::Schema { arrow::datatypes::Schema::new(vec![ #( - <#field_types as arrow_convert::field::ArrowField>::field(#field_names), + { + let field = <#field_types as arrow_convert::field::ArrowField>::field(#field_names); + let field = arrow_convert::field::with_list_element_name( + field, + #field_list_element_name_exprs, + ); + let field = arrow_convert::field::with_field_metadata( + field, + #field_metadata_exprs, + ); + arrow_convert::field::with_list_element_metadata( + field, + #field_list_element_metadata_exprs, + ) + }, )* ]) } diff --git a/arrow_convert_derive/src/input.rs b/arrow_convert_derive/src/input.rs index 26d3a01..612ef5d 100644 --- a/arrow_convert_derive/src/input.rs +++ b/arrow_convert_derive/src/input.rs @@ -10,6 +10,9 @@ use crate::case::RenameRule; pub const ARROW_FIELD: &str = "arrow_field"; pub const FIELD_TYPE: &str = "type"; pub const FIELD_NAME: &str = "name"; +pub const FIELD_LIST_ELEMENT_NAME: &str = "list_element_name"; +pub const FIELD_METADATA: &str = "metadata"; +pub const FIELD_LIST_ELEMENT_METADATA: &str = "list_element_metadata"; pub const FIELD_SKIP: &str = "skip"; pub const FIELD_RENAME_ALL: &str = "rename_all"; pub const UNION_TYPE: &str = "type"; @@ -31,6 +34,10 @@ pub struct DeriveStruct { pub is_transparent: bool, /// Container-level rename_all rule pub rename_all: Option, + /// Container-level list element field name override. + pub list_element_name: Option, + /// Container-level list element metadata default. + pub list_element_metadata: Vec<(String, String)>, } pub struct DeriveEnum { @@ -48,12 +55,19 @@ pub struct ContainerAttrs { pub transparent: Option, /// Container-level rename_all rule pub rename_all: Option, + /// Container-level list element field name override. + pub list_element_name: Option, + /// Container-level list element metadata default. + pub list_element_metadata: Vec<(String, String)>, } /// All field attributes pub struct FieldAttrs { pub field_type: Option, pub field_name: Option, + pub list_element_name: Option, + pub metadata: Vec<(String, String)>, + pub list_element_metadata: Vec<(String, String)>, pub skip: bool, } @@ -61,6 +75,9 @@ pub struct DeriveField { pub syn: syn::Field, pub field_type: syn::Type, pub field_name: Option, + pub list_element_name: Option, + pub metadata: Vec<(String, String)>, + pub list_element_metadata: Vec<(String, String)>, pub skip: bool, } @@ -96,6 +113,8 @@ impl ContainerAttrs { let mut is_dense: Option = None; let mut is_transparent: Option = None; let mut rename_all: Option = None; + let mut list_element_name: Option = None; + let mut list_element_metadata: Vec<(String, String)> = Vec::new(); for attr in attrs { if attr.path().is_ident(ARROW_FIELD) { @@ -131,6 +150,23 @@ impl ContainerAttrs { rename_all = Some(rule); } Ok(()) + } else if nested.path.is_ident(FIELD_LIST_ELEMENT_NAME) { + let value = nested.value()?; + let Lit::Str(string) = value.parse()? else { + return Err(nested.error("Unexpected value for list_element_name")); + }; + list_element_name = Some(string.value()); + Ok(()) + } else if nested.path.is_ident(FIELD_LIST_ELEMENT_METADATA) { + nested.parse_nested_meta(|entry| { + let key = metadata_key(&entry.path)?; + let value = entry.value()?; + let Lit::Str(string) = value.parse()? else { + return Err(entry.error("Expected string value for metadata entry")); + }; + list_element_metadata.push((key, string.value())); + Ok(()) + }) } else { Err(meta.error("Unexpected attribute")) } @@ -146,6 +182,8 @@ impl ContainerAttrs { is_dense, transparent: is_transparent, rename_all, + list_element_name, + list_element_metadata, } } } @@ -154,6 +192,9 @@ impl FieldAttrs { pub fn from_ast(input: &[syn::Attribute]) -> FieldAttrs { let mut field_type: Option = None; let mut field_name: Option = None; + let mut list_element_name: Option = None; + let mut metadata: Vec<(String, String)> = Vec::new(); + let mut list_element_metadata: Vec<(String, String)> = Vec::new(); let mut skip = false; for attr in input { @@ -178,6 +219,32 @@ impl FieldAttrs { return Err(meta.error("Unexpected attribute")); }; field_name = Some(string.value()); + } else if nested.path.is_ident(FIELD_LIST_ELEMENT_NAME) { + let value = nested.value()?; + let Lit::Str(string) = value.parse()? else { + return Err(meta.error("Unexpected attribute")); + }; + list_element_name = Some(string.value()); + } else if nested.path.is_ident(FIELD_METADATA) { + nested.parse_nested_meta(|entry| { + let key = metadata_key(&entry.path)?; + let value = entry.value()?; + let Lit::Str(string) = value.parse()? else { + return Err(entry.error("Expected string value for metadata entry")); + }; + metadata.push((key, string.value())); + Ok(()) + })?; + } else if nested.path.is_ident(FIELD_LIST_ELEMENT_METADATA) { + nested.parse_nested_meta(|entry| { + let key = metadata_key(&entry.path)?; + let value = entry.value()?; + let Lit::Str(string) = value.parse()? else { + return Err(entry.error("Expected string value for metadata entry")); + }; + list_element_metadata.push((key, string.value())); + Ok(()) + })?; } else { return Err(meta.error("Unexpected attribute")); } @@ -191,6 +258,9 @@ impl FieldAttrs { FieldAttrs { field_type, field_name, + list_element_name, + metadata, + list_element_metadata, skip, } } @@ -215,6 +285,8 @@ impl DeriveStruct { fields: ast.fields.iter().map(DeriveField::from_ast).collect::>(), is_transparent, rename_all: container_attrs.rename_all, + list_element_name: container_attrs.list_element_name, + list_element_metadata: container_attrs.list_element_metadata, } } } @@ -243,6 +315,9 @@ impl DeriveField { syn: input.clone(), field_type: attrs.field_type.unwrap_or_else(|| input.ty.clone()), field_name: attrs.field_name, + list_element_name: attrs.list_element_name, + metadata: attrs.metadata, + list_element_metadata: attrs.list_element_metadata, skip: attrs.skip, } } @@ -268,6 +343,48 @@ impl DeriveField { rust_name } + + /// Get the effective list element field name considering precedence: + /// arrow_field(list_element_name) > container list_element_name + pub fn effective_list_element_name(&self, container_list_element_name: Option<&str>) -> Option { + self.list_element_name + .clone() + .or_else(|| container_list_element_name.map(str::to_string)) + } + + pub fn effective_list_element_metadata( + &self, + container_list_element_metadata: &[(String, String)], + ) -> Vec<(String, String)> { + merge_metadata_entries(container_list_element_metadata, &self.list_element_metadata) + } +} + +fn merge_metadata_entries(base: &[(String, String)], overrides: &[(String, String)]) -> Vec<(String, String)> { + let mut merged = base.to_vec(); + for (key, value) in overrides { + if let Some((_, existing_value)) = merged.iter_mut().find(|(k, _)| k == key) { + *existing_value = value.clone(); + } else { + merged.push((key.clone(), value.clone())); + } + } + merged +} + +fn metadata_key(path: &syn::Path) -> syn::Result { + if path.segments.is_empty() || path.segments.iter().any(|s| !s.arguments.is_empty()) { + return Err(syn::Error::new( + path.span(), + "Expected metadata key identifier path", + )); + } + Ok(path + .segments + .iter() + .map(|segment| segment.ident.to_string()) + .collect::>() + .join(":")) } impl DeriveVariant { From 51bd91179c964c80d864110c3c922eabef3ef17d Mon Sep 17 00:00:00 2001 From: sytherax Date: Tue, 10 Feb 2026 19:31:10 +1100 Subject: [PATCH 2/2] feat: add tests for list element metadata and error handling in ArrowField --- arrow_convert/tests/test_schema.rs | 65 +++++- .../tests/ui/enum_unexpected_mode_value.rs | 9 + .../ui/enum_unexpected_mode_value.stderr | 5 + .../ui/struct_metadata_value_not_string.rs | 9 + .../struct_metadata_value_not_string.stderr | 5 + .../tests/ui/struct_rename_all_not_string.rs | 9 + .../ui/struct_rename_all_not_string.stderr | 5 + arrow_convert_derive/src/input.rs | 213 +++++++++--------- 8 files changed, 209 insertions(+), 111 deletions(-) create mode 100644 arrow_convert/tests/ui/enum_unexpected_mode_value.rs create mode 100644 arrow_convert/tests/ui/enum_unexpected_mode_value.stderr create mode 100644 arrow_convert/tests/ui/struct_metadata_value_not_string.rs create mode 100644 arrow_convert/tests/ui/struct_metadata_value_not_string.stderr create mode 100644 arrow_convert/tests/ui/struct_rename_all_not_string.rs create mode 100644 arrow_convert/tests/ui/struct_rename_all_not_string.stderr diff --git a/arrow_convert/tests/test_schema.rs b/arrow_convert/tests/test_schema.rs index f304c99..aebfcea 100644 --- a/arrow_convert/tests/test_schema.rs +++ b/arrow_convert/tests/test_schema.rs @@ -1,7 +1,10 @@ use std::sync::Arc; use arrow::datatypes::*; -use arrow_convert::{field::DEFAULT_FIELD_NAME, ArrowField}; +use arrow_convert::{ + field::{with_list_element_metadata, with_list_element_name, DEFAULT_FIELD_NAME}, + ArrowField, +}; use pretty_assertions::assert_eq; #[test] @@ -384,3 +387,63 @@ fn test_metadata_support_for_parquet_field_id_keys() { Some(&"101".to_string()) ); } + +#[test] +fn test_list_element_metadata_field_override_wins_with_duplicate_container_keys() { + #[derive(Debug, ArrowField)] + #[allow(dead_code)] + #[arrow_field( + list_element_metadata(scope = "container_a"), + list_element_metadata(scope = "container_b", keep = "container") + )] + struct Root { + #[arrow_field(list_element_metadata(scope = "field"))] + levels: Vec, + } + + let DataType::Struct(fields) = ::data_type() else { + panic!("expected struct datatype"); + }; + let DataType::List(levels_element) = fields[0].data_type() else { + panic!("expected list datatype"); + }; + + assert_eq!( + levels_element.metadata().get("scope"), + Some(&"field".to_string()) + ); + assert_eq!( + levels_element.metadata().get("keep"), + Some(&"container".to_string()) + ); +} + +#[test] +fn test_with_list_element_helpers_for_large_and_fixed_size_lists() { + let large_field = Field::new( + "values", + DataType::LargeList(Arc::new(Field::new("_item", DataType::Int64, false))), + false, + ); + let large_named = with_list_element_name(large_field, Some("element")); + let large_named = with_list_element_metadata(large_named, vec![("scope".to_string(), "book".to_string())]); + let DataType::LargeList(large_element) = large_named.data_type() else { + panic!("expected LargeList"); + }; + assert_eq!(large_element.name(), "element"); + assert_eq!(large_element.metadata().get("scope"), Some(&"book".to_string())); + + let fixed_field = Field::new( + "values", + DataType::FixedSizeList(Arc::new(Field::new("_item", DataType::Int64, false)), 3), + false, + ); + let fixed_named = with_list_element_name(fixed_field, Some("level")); + let fixed_named = with_list_element_metadata(fixed_named, vec![("kind".to_string(), "depth".to_string())]); + let DataType::FixedSizeList(fixed_element, size) = fixed_named.data_type() else { + panic!("expected FixedSizeList"); + }; + assert_eq!(*size, 3); + assert_eq!(fixed_element.name(), "level"); + assert_eq!(fixed_element.metadata().get("kind"), Some(&"depth".to_string())); +} diff --git a/arrow_convert/tests/ui/enum_unexpected_mode_value.rs b/arrow_convert/tests/ui/enum_unexpected_mode_value.rs new file mode 100644 index 0000000..81ff604 --- /dev/null +++ b/arrow_convert/tests/ui/enum_unexpected_mode_value.rs @@ -0,0 +1,9 @@ +use arrow_convert::ArrowField; + +#[derive(ArrowField)] +#[arrow_field(type = "invalid_mode")] +enum BadMode { + A, +} + +fn main() {} diff --git a/arrow_convert/tests/ui/enum_unexpected_mode_value.stderr b/arrow_convert/tests/ui/enum_unexpected_mode_value.stderr new file mode 100644 index 0000000..8b8f6fc --- /dev/null +++ b/arrow_convert/tests/ui/enum_unexpected_mode_value.stderr @@ -0,0 +1,5 @@ +error: Unexpected value for mode + --> tests/ui/enum_unexpected_mode_value.rs:4:15 + | +4 | #[arrow_field(type = "invalid_mode")] + | ^^^^ diff --git a/arrow_convert/tests/ui/struct_metadata_value_not_string.rs b/arrow_convert/tests/ui/struct_metadata_value_not_string.rs new file mode 100644 index 0000000..746baa2 --- /dev/null +++ b/arrow_convert/tests/ui/struct_metadata_value_not_string.rs @@ -0,0 +1,9 @@ +use arrow_convert::ArrowField; + +#[derive(ArrowField)] +struct MetadataNotString { + #[arrow_field(metadata(role = 1))] + value: i64, +} + +fn main() {} diff --git a/arrow_convert/tests/ui/struct_metadata_value_not_string.stderr b/arrow_convert/tests/ui/struct_metadata_value_not_string.stderr new file mode 100644 index 0000000..daf1834 --- /dev/null +++ b/arrow_convert/tests/ui/struct_metadata_value_not_string.stderr @@ -0,0 +1,5 @@ +error: Expected string value for metadata entry + --> tests/ui/struct_metadata_value_not_string.rs:5:28 + | +5 | #[arrow_field(metadata(role = 1))] + | ^^^^ diff --git a/arrow_convert/tests/ui/struct_rename_all_not_string.rs b/arrow_convert/tests/ui/struct_rename_all_not_string.rs new file mode 100644 index 0000000..eb24196 --- /dev/null +++ b/arrow_convert/tests/ui/struct_rename_all_not_string.rs @@ -0,0 +1,9 @@ +use arrow_convert::ArrowField; + +#[derive(ArrowField)] +#[arrow_field(rename_all = 42)] +struct RenameAllNotString { + value: i64, +} + +fn main() {} diff --git a/arrow_convert/tests/ui/struct_rename_all_not_string.stderr b/arrow_convert/tests/ui/struct_rename_all_not_string.stderr new file mode 100644 index 0000000..c64f042 --- /dev/null +++ b/arrow_convert/tests/ui/struct_rename_all_not_string.stderr @@ -0,0 +1,5 @@ +error: Unexpected value for rename_all + --> tests/ui/struct_rename_all_not_string.rs:4:15 + | +4 | #[arrow_field(rename_all = 42)] + | ^^^^^^^^^^ diff --git a/arrow_convert_derive/src/input.rs b/arrow_convert_derive/src/input.rs index 612ef5d..0a60b3b 100644 --- a/arrow_convert_derive/src/input.rs +++ b/arrow_convert_derive/src/input.rs @@ -3,7 +3,7 @@ use proc_macro_error2::abort; use quote::format_ident; use syn::spanned::Spanned; -use syn::{DeriveInput, Ident, Lit, Meta, Visibility}; +use syn::{DeriveInput, Ident, Lit, Visibility}; use crate::case::RenameRule; @@ -118,63 +118,59 @@ impl ContainerAttrs { for attr in attrs { if attr.path().is_ident(ARROW_FIELD) { - let _ = attr.parse_nested_meta(|meta| { - if let Meta::List(list) = &attr.meta { - list.parse_nested_meta(|nested| { - if nested.path.is_ident(TRANSPARENT) { - is_transparent = Some(nested.path.span()); - Ok(()) - } else if nested.path.is_ident(UNION_TYPE) { - let value = nested.value()?; - let Lit::Str(string) = value.parse()? else { - return Err(nested.error("Unexpected value for mode")); - }; - - match string.value().as_ref() { - UNION_TYPE_DENSE => { - is_dense = Some(true); - Ok(()) - } - UNION_TYPE_SPARSE => { - is_dense = Some(false); - Ok(()) - } - _ => Err(nested.error("Unexpected value for mode")), - } - } else if nested.path.is_ident(FIELD_RENAME_ALL) { - let value = nested.value()?; - let Lit::Str(string) = value.parse()? else { - return Err(nested.error("Unexpected value for rename_all")); - }; - if let Some(rule) = RenameRule::from_str(&string.value()) { - rename_all = Some(rule); - } + if let Err(err) = attr.parse_nested_meta(|nested| { + if nested.path.is_ident(TRANSPARENT) { + is_transparent = Some(nested.path.span()); + Ok(()) + } else if nested.path.is_ident(UNION_TYPE) { + let value = nested.value()?; + let Lit::Str(string) = value.parse()? else { + return Err(nested.error("Unexpected value for mode")); + }; + + match string.value().as_ref() { + UNION_TYPE_DENSE => { + is_dense = Some(true); Ok(()) - } else if nested.path.is_ident(FIELD_LIST_ELEMENT_NAME) { - let value = nested.value()?; - let Lit::Str(string) = value.parse()? else { - return Err(nested.error("Unexpected value for list_element_name")); - }; - list_element_name = Some(string.value()); + } + UNION_TYPE_SPARSE => { + is_dense = Some(false); Ok(()) - } else if nested.path.is_ident(FIELD_LIST_ELEMENT_METADATA) { - nested.parse_nested_meta(|entry| { - let key = metadata_key(&entry.path)?; - let value = entry.value()?; - let Lit::Str(string) = value.parse()? else { - return Err(entry.error("Expected string value for metadata entry")); - }; - list_element_metadata.push((key, string.value())); - Ok(()) - }) - } else { - Err(meta.error("Unexpected attribute")) } - })?; - }; - - Ok(()) - }); + _ => Err(nested.error("Unexpected value for mode")), + } + } else if nested.path.is_ident(FIELD_RENAME_ALL) { + let value = nested.value()?; + let Lit::Str(string) = value.parse()? else { + return Err(nested.error("Unexpected value for rename_all")); + }; + if let Some(rule) = RenameRule::from_str(&string.value()) { + rename_all = Some(rule); + } + Ok(()) + } else if nested.path.is_ident(FIELD_LIST_ELEMENT_NAME) { + let value = nested.value()?; + let Lit::Str(string) = value.parse()? else { + return Err(nested.error("Unexpected value for list_element_name")); + }; + list_element_name = Some(string.value()); + Ok(()) + } else if nested.path.is_ident(FIELD_LIST_ELEMENT_METADATA) { + nested.parse_nested_meta(|entry| { + let key = metadata_key(&entry.path)?; + let value = entry.value()?; + let Lit::Str(string) = value.parse()? else { + return Err(entry.error("Expected string value for metadata entry")); + }; + list_element_metadata.push((key, string.value())); + Ok(()) + }) + } else { + Err(nested.error("Unexpected attribute")) + } + }) { + abort!(err.span(), "{}", err); + } } } @@ -199,59 +195,57 @@ impl FieldAttrs { for attr in input { if attr.path().is_ident(ARROW_FIELD) { - attr.parse_nested_meta(|meta| { - let Meta::List(list) = &attr.meta else { - return Err(meta.error("Unexpected attribute")); - }; - - list.parse_nested_meta(|nested| { - if nested.path.is_ident(FIELD_SKIP) { - skip = true; - } else if nested.path.is_ident(FIELD_TYPE) { - let value = nested.value()?; - let Lit::Str(string) = value.parse()? else { - return Err(meta.error("Unexpected attribute")); - }; - field_type = Some(syn::parse_str(&string.value())?); - } else if nested.path.is_ident(FIELD_NAME) { - let value = nested.value()?; + if let Err(err) = attr.parse_nested_meta(|nested| { + if nested.path.is_ident(FIELD_SKIP) { + skip = true; + Ok(()) + } else if nested.path.is_ident(FIELD_TYPE) { + let value = nested.value()?; + let Lit::Str(string) = value.parse()? else { + return Err(nested.error("Unexpected attribute")); + }; + field_type = Some(syn::parse_str(&string.value())?); + Ok(()) + } else if nested.path.is_ident(FIELD_NAME) { + let value = nested.value()?; + let Lit::Str(string) = value.parse()? else { + return Err(nested.error("Unexpected attribute")); + }; + field_name = Some(string.value()); + Ok(()) + } else if nested.path.is_ident(FIELD_LIST_ELEMENT_NAME) { + let value = nested.value()?; + let Lit::Str(string) = value.parse()? else { + return Err(nested.error("Unexpected attribute")); + }; + list_element_name = Some(string.value()); + Ok(()) + } else if nested.path.is_ident(FIELD_METADATA) { + nested.parse_nested_meta(|entry| { + let key = metadata_key(&entry.path)?; + let value = entry.value()?; let Lit::Str(string) = value.parse()? else { - return Err(meta.error("Unexpected attribute")); + return Err(entry.error("Expected string value for metadata entry")); }; - field_name = Some(string.value()); - } else if nested.path.is_ident(FIELD_LIST_ELEMENT_NAME) { - let value = nested.value()?; + metadata.push((key, string.value())); + Ok(()) + }) + } else if nested.path.is_ident(FIELD_LIST_ELEMENT_METADATA) { + nested.parse_nested_meta(|entry| { + let key = metadata_key(&entry.path)?; + let value = entry.value()?; let Lit::Str(string) = value.parse()? else { - return Err(meta.error("Unexpected attribute")); + return Err(entry.error("Expected string value for metadata entry")); }; - list_element_name = Some(string.value()); - } else if nested.path.is_ident(FIELD_METADATA) { - nested.parse_nested_meta(|entry| { - let key = metadata_key(&entry.path)?; - let value = entry.value()?; - let Lit::Str(string) = value.parse()? else { - return Err(entry.error("Expected string value for metadata entry")); - }; - metadata.push((key, string.value())); - Ok(()) - })?; - } else if nested.path.is_ident(FIELD_LIST_ELEMENT_METADATA) { - nested.parse_nested_meta(|entry| { - let key = metadata_key(&entry.path)?; - let value = entry.value()?; - let Lit::Str(string) = value.parse()? else { - return Err(entry.error("Expected string value for metadata entry")); - }; - list_element_metadata.push((key, string.value())); - Ok(()) - })?; - } else { - return Err(meta.error("Unexpected attribute")); - } - Ok(()) - }) - }) - .unwrap_or_default(); + list_element_metadata.push((key, string.value())); + Ok(()) + }) + } else { + Err(nested.error("Unexpected attribute")) + } + }) { + abort!(err.span(), "{}", err); + } } } @@ -361,13 +355,12 @@ impl DeriveField { } fn merge_metadata_entries(base: &[(String, String)], overrides: &[(String, String)]) -> Vec<(String, String)> { - let mut merged = base.to_vec(); - for (key, value) in overrides { - if let Some((_, existing_value)) = merged.iter_mut().find(|(k, _)| k == key) { - *existing_value = value.clone(); - } else { - merged.push((key.clone(), value.clone())); + let mut merged: Vec<(String, String)> = Vec::new(); + for (key, value) in base.iter().chain(overrides.iter()) { + if let Some(existing_idx) = merged.iter().position(|(k, _)| k == key) { + merged.remove(existing_idx); } + merged.push((key.clone(), value.clone())); } merged }