Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,39 @@ Precedence: `#[arrow_field(name)]` > `rename_all` > Rust field name.

Supported `rename_all` values: `lowercase`, `UPPERCASE`, `camelCase`, `PascalCase`, `snake_case`, `SCREAMING_SNAKE_CASE`, `kebab-case`, `SCREAMING-KEBAB-CASE`.

### Field Metadata and List Element Annotations

Arrow field metadata can be attached via `metadata(...)` and list element metadata can be attached via `list_element_metadata(...)`.

```rust
# use arrow_convert::ArrowField;
#[derive(ArrowField)]
struct SchemaAnnotated {
#[arrow_field(metadata(role = "top", PARQUET::field_id = "7"))]
top_level: i64,
#[arrow_field(
list_element_name = "element",
list_element_metadata(PARQUET::field_id = "9", scope = "book")
)]
prices: Vec<i64>,
}
```

List element naming can be set at the container level and overridden per field:

```rust
# use arrow_convert::ArrowField;
#[derive(ArrowField)]
#[arrow_field(list_element_name = "entry")]
struct Lists {
bids: Vec<i64>, // list child field name -> "entry"
#[arrow_field(list_element_name = "level")]
asks: Vec<i64>, // list child field name -> "level"
}
```

For list element metadata, precedence is field-level override over container-level defaults for matching keys.

### i128

i128 represents a decimal number and requires the precision and scale to be specified to be used as an Arrow data type. The precision and scale can be specified by using a type override via the `I128` type.
Expand Down
59 changes: 59 additions & 0 deletions arrow_convert/src/field.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,65 @@ use chrono::{DateTime, NaiveDate, NaiveDateTime, Utc};
/// The default field name used when a specific name is not provided.
pub const DEFAULT_FIELD_NAME: &str = "_item";

/// Overrides the element field name for list-like datatypes on the given field.
///
/// This only affects the direct child field of:
/// - `DataType::List`
/// - `DataType::LargeList`
/// - `DataType::FixedSizeList`
///
/// For other datatypes, this function is a no-op.
pub fn with_list_element_name(field: Field, list_element_name: Option<&str>) -> Field {
let Some(list_element_name) = list_element_name else {
return field;
};

match field.data_type().clone() {
DataType::List(child) => field.with_data_type(DataType::List(Arc::new(
child.as_ref().clone().with_name(list_element_name),
))),
DataType::LargeList(child) => field.with_data_type(DataType::LargeList(Arc::new(
child.as_ref().clone().with_name(list_element_name),
))),
DataType::FixedSizeList(child, size) => field.with_data_type(DataType::FixedSizeList(
Arc::new(child.as_ref().clone().with_name(list_element_name)),
size,
)),
_ => field,
}
}

/// Adds or overrides metadata entries on a field.
pub fn with_field_metadata(mut field: Field, metadata: Vec<(String, String)>) -> Field {
for (key, value) in metadata {
field.metadata_mut().insert(key, value);
}
field
}

/// Adds or overrides metadata entries on the list-like element child field.
pub fn with_list_element_metadata(field: Field, metadata: Vec<(String, String)>) -> Field {
if metadata.is_empty() {
return field;
}

match field.data_type().clone() {
DataType::List(child) => {
let child = with_field_metadata(child.as_ref().clone(), metadata);
field.with_data_type(DataType::List(Arc::new(child)))
}
DataType::LargeList(child) => {
let child = with_field_metadata(child.as_ref().clone(), metadata);
field.with_data_type(DataType::LargeList(Arc::new(child)))
}
DataType::FixedSizeList(child, size) => {
let child = with_field_metadata(child.as_ref().clone(), metadata);
field.with_data_type(DataType::FixedSizeList(Arc::new(child), size))
}
_ => field,
}
}

/// Trait implemented by all types that can be used as an Arrow field.
///
/// Implementations are provided for types already supported by the arrow crate:
Expand Down
196 changes: 195 additions & 1 deletion arrow_convert/tests/test_schema.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
use std::sync::Arc;

use arrow::datatypes::*;
use arrow_convert::{field::DEFAULT_FIELD_NAME, ArrowField};
use arrow_convert::{
field::{with_list_element_metadata, with_list_element_name, DEFAULT_FIELD_NAME},
ArrowField,
};
use pretty_assertions::assert_eq;

#[test]
Expand Down Expand Up @@ -253,3 +256,194 @@ fn test_large_string_schema() {
)))
);
}

#[test]
fn test_field_name_override_with_rename_all() {
#[derive(Debug, ArrowField)]
#[allow(dead_code)]
#[arrow_field(rename_all = "camelCase")]
struct Root {
plain_field: i32,
#[arrow_field(name = "custom_name")]
renamed_field: i32,
r#type: i32,
}

let DataType::Struct(fields) = <Root as arrow_convert::field::ArrowField>::data_type() else {
panic!("expected struct datatype");
};

let names: Vec<_> = fields.iter().map(|field| field.name().to_string()).collect();
assert_eq!(names, vec!["plainField", "custom_name", "type"]);
}

#[test]
fn test_rename_all_composes_with_type_name_and_skip() {
#[derive(Debug, ArrowField)]
#[allow(dead_code)]
#[allow(non_snake_case)]
#[arrow_field(rename_all = "snake_case")]
struct Root {
plainField: i32,
#[arrow_field(type = "arrow_convert::field::LargeString")]
optionalLabel: Option<String>,
#[arrow_field(type = "arrow_convert::field::LargeVec<i64>", name = "custom_list")]
ignoredNameByRenameAll: Vec<i64>,
#[arrow_field(skip)]
shouldSkip: i32,
}

let DataType::Struct(fields) = <Root as arrow_convert::field::ArrowField>::data_type() else {
panic!("expected struct datatype");
};

assert_eq!(fields.len(), 3);

assert_eq!(fields[0].name(), "plain_field");
assert_eq!(fields[0].data_type(), &DataType::Int32);
assert!(!fields[0].is_nullable());

assert_eq!(fields[1].name(), "optional_label");
assert_eq!(fields[1].data_type(), &DataType::LargeUtf8);
assert!(!fields[1].is_nullable());

assert_eq!(fields[2].name(), "custom_list");
assert_eq!(
fields[2].data_type(),
&DataType::LargeList(Arc::new(Field::new(DEFAULT_FIELD_NAME, DataType::Int64, false)))
);
assert!(!fields[2].is_nullable());
}

#[test]
fn test_list_element_name_container_and_field_override() {
#[derive(Debug, ArrowField)]
#[allow(dead_code)]
#[arrow_field(list_element_name = "entry")]
struct Root {
numbers: Vec<i32>,
#[arrow_field(list_element_name = "node")]
labels: Vec<String>,
scalar: i64,
}

let DataType::Struct(fields) = <Root as arrow_convert::field::ArrowField>::data_type() else {
panic!("expected struct datatype");
};

assert_eq!(
fields[0].data_type(),
&DataType::List(Arc::new(Field::new("entry", DataType::Int32, false)))
);
assert_eq!(
fields[1].data_type(),
&DataType::List(Arc::new(Field::new("node", DataType::Utf8, false)))
);
assert_eq!(fields[2].data_type(), &DataType::Int64);
}

#[test]
fn test_metadata_support_for_parquet_field_id_keys() {
#[derive(Debug, ArrowField)]
#[allow(dead_code)]
#[arrow_field(list_element_metadata(scope = "container", PARQUET::field_id = "101"))]
struct Root {
#[arrow_field(
metadata(role = "top", PARQUET::field_id = "7"),
list_element_metadata(scope = "field", level = "1", PARQUET::field_id = "9")
)]
bids: Vec<i64>,
asks: Vec<i64>,
}

let DataType::Struct(fields) = <Root as arrow_convert::field::ArrowField>::data_type() else {
panic!("expected struct datatype");
};

let bids = &fields[0];
assert_eq!(bids.metadata().get("role"), Some(&"top".to_string()));
assert_eq!(bids.metadata().get("PARQUET:field_id"), Some(&"7".to_string()));
let DataType::List(bids_element) = bids.data_type() else {
panic!("expected list datatype for bids");
};
assert_eq!(bids_element.metadata().get("scope"), Some(&"field".to_string()));
assert_eq!(bids_element.metadata().get("level"), Some(&"1".to_string()));
assert_eq!(
bids_element.metadata().get("PARQUET:field_id"),
Some(&"9".to_string())
);

let asks = &fields[1];
assert!(asks.metadata().get("role").is_none());
let DataType::List(asks_element) = asks.data_type() else {
panic!("expected list datatype for asks");
};
assert_eq!(
asks_element.metadata().get("scope"),
Some(&"container".to_string())
);
assert_eq!(
asks_element.metadata().get("PARQUET:field_id"),
Some(&"101".to_string())
);
}

#[test]
fn test_list_element_metadata_field_override_wins_with_duplicate_container_keys() {
#[derive(Debug, ArrowField)]
#[allow(dead_code)]
#[arrow_field(
list_element_metadata(scope = "container_a"),
list_element_metadata(scope = "container_b", keep = "container")
)]
struct Root {
#[arrow_field(list_element_metadata(scope = "field"))]
levels: Vec<i64>,
}

let DataType::Struct(fields) = <Root as arrow_convert::field::ArrowField>::data_type() else {
panic!("expected struct datatype");
};
let DataType::List(levels_element) = fields[0].data_type() else {
panic!("expected list datatype");
};

assert_eq!(
levels_element.metadata().get("scope"),
Some(&"field".to_string())
);
assert_eq!(
levels_element.metadata().get("keep"),
Some(&"container".to_string())
);
}

#[test]
fn test_with_list_element_helpers_for_large_and_fixed_size_lists() {
let large_field = Field::new(
"values",
DataType::LargeList(Arc::new(Field::new("_item", DataType::Int64, false))),
false,
);
let large_named = with_list_element_name(large_field, Some("element"));
let large_named = with_list_element_metadata(large_named, vec![("scope".to_string(), "book".to_string())]);
let DataType::LargeList(large_element) = large_named.data_type() else {
panic!("expected LargeList");
};
assert_eq!(large_element.name(), "element");
assert_eq!(large_element.metadata().get("scope"), Some(&"book".to_string()));

let fixed_field = Field::new(
"values",
DataType::FixedSizeList(Arc::new(Field::new("_item", DataType::Int64, false)), 3),
false,
);
let fixed_named = with_list_element_name(fixed_field, Some("level"));
let fixed_named = with_list_element_metadata(fixed_named, vec![("kind".to_string(), "depth".to_string())]);
let DataType::FixedSizeList(fixed_element, size) = fixed_named.data_type() else {
panic!("expected FixedSizeList");
};
assert_eq!(*size, 3);
assert_eq!(fixed_element.name(), "level");
assert_eq!(fixed_element.metadata().get("kind"), Some(&"depth".to_string()));
}
9 changes: 9 additions & 0 deletions arrow_convert/tests/ui/enum_unexpected_mode_value.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
use arrow_convert::ArrowField;

#[derive(ArrowField)]
#[arrow_field(type = "invalid_mode")]
enum BadMode {
A,
}

fn main() {}
5 changes: 5 additions & 0 deletions arrow_convert/tests/ui/enum_unexpected_mode_value.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
error: Unexpected value for mode
--> tests/ui/enum_unexpected_mode_value.rs:4:15
|
4 | #[arrow_field(type = "invalid_mode")]
| ^^^^
9 changes: 9 additions & 0 deletions arrow_convert/tests/ui/struct_metadata_value_not_string.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
use arrow_convert::ArrowField;

#[derive(ArrowField)]
struct MetadataNotString {
#[arrow_field(metadata(role = 1))]
value: i64,
}

fn main() {}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
error: Expected string value for metadata entry
--> tests/ui/struct_metadata_value_not_string.rs:5:28
|
5 | #[arrow_field(metadata(role = 1))]
| ^^^^
9 changes: 9 additions & 0 deletions arrow_convert/tests/ui/struct_rename_all_not_string.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
use arrow_convert::ArrowField;

#[derive(ArrowField)]
#[arrow_field(rename_all = 42)]
struct RenameAllNotString {
value: i64,
}

fn main() {}
5 changes: 5 additions & 0 deletions arrow_convert/tests/ui/struct_rename_all_not_string.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
error: Unexpected value for rename_all
--> tests/ui/struct_rename_all_not_string.rs:4:15
|
4 | #[arrow_field(rename_all = 42)]
| ^^^^^^^^^^
Loading
Loading