From f80f256f00d3e850ebbc7c754326b5336f4d2660 Mon Sep 17 00:00:00 2001 From: sytherax Date: Tue, 10 Feb 2026 21:51:34 +1100 Subject: [PATCH] feat: add serialization test for list element name and metadata support --- arrow_convert/tests/test_schema.rs | 34 ++++++++++++++++++++++- arrow_convert_derive/src/derive_struct.rs | 27 ++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/arrow_convert/tests/test_schema.rs b/arrow_convert/tests/test_schema.rs index aebfcea..bffb9bb 100644 --- a/arrow_convert/tests/test_schema.rs +++ b/arrow_convert/tests/test_schema.rs @@ -3,7 +3,8 @@ use std::sync::Arc; use arrow::datatypes::*; use arrow_convert::{ field::{with_list_element_metadata, with_list_element_name, DEFAULT_FIELD_NAME}, - ArrowField, + serialize::TryIntoArrow, + ArrowField, ArrowSerialize, }; use pretty_assertions::assert_eq; @@ -447,3 +448,34 @@ fn test_with_list_element_helpers_for_large_and_fixed_size_lists() { assert_eq!(fixed_element.name(), "level"); assert_eq!(fixed_element.metadata().get("kind"), Some(&"depth".to_string())); } + +#[test] +fn test_serialize_respects_list_element_name_and_metadata() { + #[derive(Debug, ArrowField, ArrowSerialize)] + #[allow(dead_code)] + #[arrow_field(list_element_metadata(scope = "container"))] + struct Root { + #[arrow_field(list_element_name = "level", list_element_metadata(PARQUET::field_id = "9"))] + bids: Vec, + } + + let rows = vec![Root { bids: vec![1, 2, 3] }]; + let array: arrow::array::ArrayRef = rows.try_into_arrow().expect("serialization should succeed"); + let struct_array = array + .as_any() + .downcast_ref::() + .expect("expected StructArray"); + + let fields = struct_array.fields(); + let bids = &fields[0]; + let DataType::List(element) = bids.data_type() else { + panic!("expected list datatype"); + }; + + assert_eq!(element.name(), "level"); + assert_eq!(element.metadata().get("scope"), Some(&"container".to_string())); + assert_eq!( + element.metadata().get("PARQUET:field_id"), + Some(&"9".to_string()) + ); +} diff --git a/arrow_convert_derive/src/derive_struct.rs b/arrow_convert_derive/src/derive_struct.rs index f99742a..c97fa27 100644 --- a/arrow_convert_derive/src/derive_struct.rs +++ b/arrow_convert_derive/src/derive_struct.rs @@ -295,6 +295,31 @@ pub fn expand_serialize(input: DeriveStruct) -> TokenStream { } Ok(()) } + + fn align_struct_values( + values: Vec, + fields: &arrow::datatypes::Fields, + ) -> Vec { + values + .into_iter() + .zip(fields.iter()) + .map(|(value, field)| { + if value.data_type() == field.data_type() { + value + } else { + arrow::compute::cast(value.as_ref(), field.data_type()).unwrap_or_else(|e| { + panic!( + "failed to cast field '{}' from {:?} to {:?}: {}", + field.name(), + value.data_type(), + field.data_type(), + e + ) + }) + } + }) + .collect() + } } }; @@ -346,6 +371,7 @@ pub fn expand_serialize(input: DeriveStruct) -> TokenStream { .clone() else { panic!("datatype is not struct") }; + let values = #mutable_array_name::align_struct_values(values, &fields); std::sync::Arc::new(arrow::array::StructArray::new( fields, @@ -364,6 +390,7 @@ pub fn expand_serialize(input: DeriveStruct) -> TokenStream { .clone() else { panic!("datatype is not struct") }; + let values = #mutable_array_name::align_struct_values(values, &fields); std::sync::Arc::new(arrow::array::StructArray::new( fields,