From 53f915f1285b9f358deaf4d8440e7e5fe6568750 Mon Sep 17 00:00:00 2001 From: Christopher Prohm Date: Sat, 12 Apr 2025 13:36:00 +0200 Subject: [PATCH 1/6] Use ruff for formatting --- x.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/x.py b/x.py index 7ceb9e5..cd1af8e 100644 --- a/x.py +++ b/x.py @@ -7,6 +7,7 @@ all_arrow_features = [ # arrow-version:insert: "arrow-{version}", + "arrow-55", "arrow-54", "arrow-53", "arrow-52", @@ -145,7 +146,7 @@ def _workflow_check_steps(): @cmd(help="Format the code") def format(): - _sh(f"{python} -m black {_q(__file__)}") + _sh(f"uv run --with 'ruff==0.11.5' ruff format {_q(__file__)}") _sh("cargo fmt") # the impl files are not found by cargo fmt @@ -201,8 +202,8 @@ def doc(private=False, open=False): f""" cargo doc --features {default_features} - {'--document-private-items' if private else ''} - {'--open' if open else ''} + {"--document-private-items" if private else ""} + {"--open" if open else ""} """, ) From 0318a55db429f844c6cfea2618d2aeb29f8efa5c Mon Sep 17 00:00:00 2001 From: Christopher Prohm Date: Sat, 12 Apr 2025 13:39:24 +0200 Subject: [PATCH 2/6] Add arrow 55 --- .github/workflows/release.yml | 4 +++ .github/workflows/test.yml | 4 +++ Cargo.lock | 66 +++++++++++++++++++++++++++++++++-- marrow/Cargo.toml | 9 +++-- marrow/src/datatypes.rs | 2 +- marrow/src/impl_arrow/mod.rs | 5 +++ marrow/src/lib.rs | 1 + test_with_arrow/Cargo.toml | 3 ++ test_with_arrow/src/lib.rs | 1 + x.py | 1 + 10 files changed, 90 insertions(+), 6 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index c9229d2..d3aa027 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -52,6 +52,10 @@ "name": "Check arrow2-0-16", "run": "cargo check -p marrow --features arrow2-0-16" }, + { + "name": "Check arrow-55", + "run": "cargo check -p marrow --features arrow-55" + }, { "name": "Check arrow-54", "run": "cargo check -p marrow --features arrow-54" diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 453bba9..f6eec3b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -58,6 +58,10 @@ "name": "Check arrow2-0-16", "run": "cargo check -p marrow --features arrow2-0-16" }, + { + "name": "Check arrow-55", + "run": "cargo check -p marrow --features arrow-55" + }, { "name": "Check arrow-54", "run": "cargo check -p marrow --features arrow-54" diff --git a/Cargo.lock b/Cargo.lock index 19c1897..77e574b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -320,6 +320,22 @@ dependencies = [ "num", ] +[[package]] +name = "arrow-array" +version = "55.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cebfe926794fbc1f49ddd0cdaf898956ca9f6e79541efce62dabccfd81380472" +dependencies = [ + "ahash", + "arrow-buffer 55.0.0", + "arrow-data 55.0.0", + "arrow-schema 55.0.0", + "chrono", + "half", + "hashbrown 0.15.2", + "num", +] + [[package]] name = "arrow-buffer" version = "37.0.0" @@ -509,6 +525,17 @@ dependencies = [ "num", ] +[[package]] +name = "arrow-buffer" +version = "55.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0303c7ec4cf1a2c60310fc4d6bbc3350cd051a17bf9e9c0a8e47b4db79277824" +dependencies = [ + "bytes", + "half", + "num", +] + [[package]] name = "arrow-data" version = "37.0.0" @@ -725,6 +752,18 @@ dependencies = [ "num", ] +[[package]] +name = "arrow-data" +version = "55.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8affacf3351a24039ea24adab06f316ded523b6f8c3dbe28fbac5f18743451b" +dependencies = [ + "arrow-buffer 55.0.0", + "arrow-schema 55.0.0", + "half", + "num", +] + [[package]] name = "arrow-schema" version = "37.0.0" @@ -887,6 +926,15 @@ dependencies = [ "serde", ] +[[package]] +name = "arrow-schema" +version = "55.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7450c76ab7c5a6805be3440dc2e2096010da58f7cab301fdc996a4ee3ee74e49" +dependencies = [ + "serde", +] + [[package]] name = "arrow2" version = "0.16.0" @@ -982,14 +1030,14 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.38" +version = "0.4.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +checksum = "1a7964611d71df112cb1730f2ee67324fcf4d0fc6606acbbe9bfe06df124637c" dependencies = [ "android-tzdata", "iana-time-zone", "num-traits", - "windows-targets", + "windows-link", ] [[package]] @@ -1175,6 +1223,7 @@ dependencies = [ "arrow-array 52.2.0", "arrow-array 53.3.0", "arrow-array 54.0.0", + "arrow-array 55.0.0", "arrow-buffer 37.0.0", "arrow-buffer 38.0.0", "arrow-buffer 39.0.0", @@ -1193,6 +1242,7 @@ dependencies = [ "arrow-buffer 52.2.0", "arrow-buffer 53.3.0", "arrow-buffer 54.0.0", + "arrow-buffer 55.0.0", "arrow-data 37.0.0", "arrow-data 38.0.0", "arrow-data 39.0.0", @@ -1211,6 +1261,7 @@ dependencies = [ "arrow-data 52.2.0", "arrow-data 53.3.0", "arrow-data 54.0.0", + "arrow-data 55.0.0", "arrow-schema 37.0.0", "arrow-schema 38.0.0", "arrow-schema 39.0.0", @@ -1229,6 +1280,7 @@ dependencies = [ "arrow-schema 52.2.0", "arrow-schema 53.3.0", "arrow-schema 54.0.0", + "arrow-schema 55.0.0", "arrow2 0.16.0", "arrow2 0.17.4", "bytemuck", @@ -1447,6 +1499,7 @@ dependencies = [ "arrow-array 52.2.0", "arrow-array 53.3.0", "arrow-array 54.0.0", + "arrow-array 55.0.0", "arrow-schema 37.0.0", "arrow-schema 38.0.0", "arrow-schema 39.0.0", @@ -1465,6 +1518,7 @@ dependencies = [ "arrow-schema 52.2.0", "arrow-schema 53.3.0", "arrow-schema 54.0.0", + "arrow-schema 55.0.0", "chrono", "half", "marrow", @@ -1562,6 +1616,12 @@ dependencies = [ "windows-targets", ] +[[package]] +name = "windows-link" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38" + [[package]] name = "windows-targets" version = "0.52.6" diff --git a/marrow/Cargo.toml b/marrow/Cargo.toml index 95b51d3..bb60248 100644 --- a/marrow/Cargo.toml +++ b/marrow/Cargo.toml @@ -13,7 +13,7 @@ rust-version = "1.70.0" [package.metadata.docs.rs] # arrow-version:replace: features = ["arrow2-0-17", "arrow-{version}", "serde"] -features = ["arrow2-0-17", "arrow-54", "serde"] +features = ["arrow2-0-17", "arrow-55", "serde"] [features] default = [] @@ -23,6 +23,7 @@ serde = ["dep:serde"] # support for different arrow versions # arrow-version:insert: arrow-{version} = ["dep:arrow-array-{version}", "dep:arrow-schema-{version}", "dep:arrow-data-{version}", "dep:arrow-buffer-{version}"] +arrow-55 = ["dep:arrow-array-55", "dep:arrow-schema-55", "dep:arrow-data-55", "dep:arrow-buffer-55"] arrow-54 = ["dep:arrow-array-54", "dep:arrow-schema-54", "dep:arrow-data-54", "dep:arrow-buffer-54"] arrow-53 = ["dep:arrow-array-53", "dep:arrow-schema-53", "dep:arrow-data-53", "dep:arrow-buffer-53"] arrow-52 = ["dep:arrow-array-52", "dep:arrow-schema-52", "dep:arrow-data-52", "dep:arrow-buffer-52"] @@ -53,6 +54,7 @@ half = { version = "2", default-features = false } serde = { version = "1.0", default-features = false, features = ["std", "derive"], optional = true } # arrow-version:insert: arrow-array-{version} = {{ package = "arrow-array", version = "{version}", optional = true, default-features = false }} +arrow-array-55 = { package = "arrow-array", version = "55", optional = true, default-features = false } arrow-array-54 = { package = "arrow-array", version = "54", optional = true, default-features = false } arrow-array-53 = { package = "arrow-array", version = "53", optional = true, default-features = false } arrow-array-52 = { package = "arrow-array", version = "52", optional = true, default-features = false } @@ -73,6 +75,7 @@ arrow-array-38 = { package = "arrow-array", version = "38", optional = true, def arrow-array-37 = { package = "arrow-array", version = "37", optional = true, default-features = false } # arrow-version:insert: arrow-buffer-{version} = {{ package = "arrow-buffer", version = "{version}", optional = true, default-features = false }} +arrow-buffer-55 = { package = "arrow-buffer", version = "55", optional = true, default-features = false } arrow-buffer-54 = { package = "arrow-buffer", version = "54", optional = true, default-features = false } arrow-buffer-53 = { package = "arrow-buffer", version = "53", optional = true, default-features = false } arrow-buffer-52 = { package = "arrow-buffer", version = "52", optional = true, default-features = false } @@ -93,6 +96,7 @@ arrow-buffer-38 = { package = "arrow-buffer", version = "38", optional = true, d arrow-buffer-37 = { package = "arrow-buffer", version = "37", optional = true, default-features = false } # arrow-version:insert: arrow-data-{version} = {{ package = "arrow-data", version="{version}", optional = true, default-features = false }} +arrow-data-55 = { package = "arrow-data", version="55", optional = true, default-features = false } arrow-data-54 = { package = "arrow-data", version="54", optional = true, default-features = false } arrow-data-53 = { package = "arrow-data", version="53", optional = true, default-features = false } arrow-data-52 = { package = "arrow-data", version="52", optional = true, default-features = false } @@ -113,6 +117,7 @@ arrow-data-38 = { package = "arrow-data", version="38", optional = true, default arrow-data-37 = { package = "arrow-data", version="37", optional = true, default-features = false } # arrow-version:insert: arrow-schema-{version} = {{ package = "arrow-schema", version = "{version}", optional = true, default-features = false }} +arrow-schema-55 = { package = "arrow-schema", version = "55", optional = true, default-features = false } arrow-schema-54 = { package = "arrow-schema", version = "54", optional = true, default-features = false } arrow-schema-53 = { package = "arrow-schema", version = "53", optional = true, default-features = false } arrow-schema-52 = { package = "arrow-schema", version = "52", optional = true, default-features = false } @@ -133,4 +138,4 @@ arrow-schema-38 = { package = "arrow-schema", version = "38", optional = true, d arrow-schema-37 = { package = "arrow-schema", version = "37", optional = true, default-features = false } arrow2-0-17 = { package = "arrow2", version = "0.17", optional = true, default-features = false } -arrow2-0-16 = { package = "arrow2", version = "0.16", optional = true, default-features = false } +arrow2-0-16 = { package = "arrow2", version = "0.16", optional = true, default-features = false } \ No newline at end of file diff --git a/marrow/src/datatypes.rs b/marrow/src/datatypes.rs index b5e5bfd..b2cffb1 100644 --- a/marrow/src/datatypes.rs +++ b/marrow/src/datatypes.rs @@ -214,7 +214,7 @@ pub enum DataType { Binary, /// Byte arrays stored with `i64` offsets LargeBinary, - /// Bytes stored with ì32` offsets or inline for small values + /// Bytes stored with `u32` offsets or inline for small values BinaryView, /// Byte arrays with fixed length FixedSizeBinary(i32), diff --git a/marrow/src/impl_arrow/mod.rs b/marrow/src/impl_arrow/mod.rs index 733d58a..036cecb 100644 --- a/marrow/src/impl_arrow/mod.rs +++ b/marrow/src/impl_arrow/mod.rs @@ -2,6 +2,11 @@ #![cfg_attr(any(), rustfmt::skip)] // arrow-version:insert: #[cfg(feature = "arrow-{version}")]{\n}mod arrow_{version} {{{\n} use {{arrow_array_{version} as arrow_array, arrow_buffer_{version} as arrow_buffer, arrow_data_{version} as arrow_data, arrow_schema_{version} as arrow_schema}};{\n} include!("impl_api_53.rs");{\n}}} +#[cfg(feature = "arrow-55")] +mod arrow_55 { + use {arrow_array_55 as arrow_array, arrow_buffer_55 as arrow_buffer, arrow_data_55 as arrow_data, arrow_schema_55 as arrow_schema}; + include!("impl_api_53.rs"); +} #[cfg(feature = "arrow-54")] mod arrow_54 { use {arrow_array_54 as arrow_array, arrow_buffer_54 as arrow_buffer, arrow_data_54 as arrow_data, arrow_schema_54 as arrow_schema}; diff --git a/marrow/src/lib.rs b/marrow/src/lib.rs index 8b3c45e..35c4f6b 100644 --- a/marrow/src/lib.rs +++ b/marrow/src/lib.rs @@ -99,6 +99,7 @@ //! | Feature | Arrow Version | //! |---------------|---------------| // arrow-version:insert: //! | `arrow-{version}` | `arrow={version}` | +//! | `arrow-55` | `arrow=55` | //! | `arrow-54` | `arrow=54` | //! | `arrow-53` | `arrow=53` | //! | `arrow-52` | `arrow=52` | diff --git a/test_with_arrow/Cargo.toml b/test_with_arrow/Cargo.toml index e20dfb5..fa25af6 100644 --- a/test_with_arrow/Cargo.toml +++ b/test_with_arrow/Cargo.toml @@ -9,6 +9,7 @@ edition = "2021" [features] # arrow-version:insert: arrow-{version} = ["marrow/arrow-{version}", "dep:arrow-array-{version}", "dep:arrow-schema-{version}"] +arrow-55 = ["marrow/arrow-55", "dep:arrow-array-55", "dep:arrow-schema-55"] arrow-54 = ["marrow/arrow-54", "dep:arrow-array-54", "dep:arrow-schema-54"] arrow-53 = ["marrow/arrow-53", "dep:arrow-array-53", "dep:arrow-schema-53"] arrow-52 = ["marrow/arrow-52", "dep:arrow-array-52", "dep:arrow-schema-52"] @@ -36,6 +37,7 @@ serde_json = "1" chrono = { version = "0.4", default-features = false } # arrow-version:insert: arrow-array-{version} = {{ package = "arrow-array", version="{version}", optional = true, default-features = false }} +arrow-array-55 = { package = "arrow-array", version="55", optional = true, default-features = false } arrow-array-54 = { package = "arrow-array", version="54", optional = true, default-features = false } arrow-array-53 = { package = "arrow-array", version = "53.3.0", optional = true, default-features = false } arrow-array-52 = { package = "arrow-array", version = "52", optional = true, default-features = false } @@ -56,6 +58,7 @@ arrow-array-38 = { package = "arrow-array", version="38", optional = true, defau arrow-array-37 = { package = "arrow-array", version="37", optional = true, default-features = false } # arrow-version:insert: arrow-schema-{version} = {{ package = "arrow-schema", version = "{version}", optional = true, default-features = false, features = ["serde"] }} +arrow-schema-55 = { package = "arrow-schema", version = "55", optional = true, default-features = false, features = ["serde"] } arrow-schema-54 = { package = "arrow-schema", version = "54", optional = true, default-features = false, features = ["serde"] } arrow-schema-53 = { package = "arrow-schema", version = "53.3.0", optional = true, default-features = false, features = ["serde"] } arrow-schema-52 = { package = "arrow-schema", version = "52", optional = true, default-features = false, features = ["serde"] } diff --git a/test_with_arrow/src/lib.rs b/test_with_arrow/src/lib.rs index 0d2179c..c395bfd 100644 --- a/test_with_arrow/src/lib.rs +++ b/test_with_arrow/src/lib.rs @@ -17,6 +17,7 @@ macro_rules! define_test_module { } // arrow-version:insert: define_test_module!("arrow-{version}", arrow_{version}, arrow_array_{version}, arrow_schema_{version}, utils, arrays, data_types,struct_arrays, fixed_size_binary_arrays, intervals, union_arrays, views); +define_test_module!("arrow-55", arrow_55, arrow_array_55, arrow_schema_55, utils, arrays, data_types,struct_arrays, fixed_size_binary_arrays, intervals, union_arrays, views); define_test_module!("arrow-54", arrow_54, arrow_array_54, arrow_schema_54, utils, arrays, data_types,struct_arrays, fixed_size_binary_arrays, intervals, union_arrays, views); define_test_module!("arrow-53", arrow_53, arrow_array_53, arrow_schema_53, utils, arrays, data_types,struct_arrays, fixed_size_binary_arrays, intervals, union_arrays, views); define_test_module!("arrow-52", arrow_52, arrow_array_52, arrow_schema_52, utils, arrays, data_types,struct_arrays, fixed_size_binary_arrays, intervals, union_arrays); diff --git a/x.py b/x.py index cd1af8e..331b51a 100644 --- a/x.py +++ b/x.py @@ -8,6 +8,7 @@ all_arrow_features = [ # arrow-version:insert: "arrow-{version}", "arrow-55", + "arrow-55", "arrow-54", "arrow-53", "arrow-52", From d5f5f321a40187a3885fb2a53e7f45a5490b69a7 Mon Sep 17 00:00:00 2001 From: Christopher Prohm Date: Sat, 12 Apr 2025 13:40:00 +0200 Subject: [PATCH 3/6] Update changelog --- Changes.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Changes.md b/Changes.md index 3d3d63c..33bb5e7 100644 --- a/Changes.md +++ b/Changes.md @@ -1,5 +1,9 @@ # Change log +## 0.2.3 + +- Add `arrow=55` support + ## 0.2.2 - Add helpers to work with bit arrays From a90412800e7fe86084e7f6a7d96e2762a6d9da06 Mon Sep 17 00:00:00 2001 From: Christopher Prohm Date: Sat, 12 Apr 2025 13:43:12 +0200 Subject: [PATCH 4/6] Update array docs --- marrow/src/array.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/marrow/src/array.rs b/marrow/src/array.rs index 7f79908..0a0fb8a 100644 --- a/marrow/src/array.rs +++ b/marrow/src/array.rs @@ -79,11 +79,12 @@ pub enum Array { /// /// Interval arrays are not supported for `arrow2`. MonthDayNanoInterval(PrimitiveArray), - /// A `[u8]` array with `i32` offsets of strings + /// A `[u8]` array with `i32` offsets interpreted as strings Utf8(BytesArray), - /// A `[u8]` array with `i64` offsets of strings + /// A `[u8]` array with `i64` offsets interpreted as strings LargeUtf8(BytesArray), - /// TODO + /// A `[u8]` array interpreted as strings with support for small inlined slices and references + /// to external buffers Utf8View(BytesViewArray), /// A `[u8]` array with `i32` offsets Binary(BytesArray), @@ -91,7 +92,7 @@ pub enum Array { LargeBinary(BytesArray), /// A `[u8; N]` array with `i32` offsets FixedSizeBinary(FixedSizeBinaryArray), - /// TODO + /// A `[u8]` array with support for small inlined slices and references to external buffers BinaryView(BytesViewArray), /// An `i128` array of decimals Decimal128(DecimalArray), From 25135b299e745256a5d4879da637e6b7be1a18fa Mon Sep 17 00:00:00 2001 From: Christopher Prohm Date: Sat, 12 Apr 2025 13:47:05 +0200 Subject: [PATCH 5/6] Add docs for map DataType --- marrow/src/datatypes.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/marrow/src/datatypes.rs b/marrow/src/datatypes.rs index b2cffb1..aa11b1d 100644 --- a/marrow/src/datatypes.rs +++ b/marrow/src/datatypes.rs @@ -243,6 +243,8 @@ pub enum DataType { /// Lists with a fixed number of element with `i32` offsets FixedSizeList(Box, i32), /// Maps + /// + /// The field should be a struct field with two children for the keys and values. Map(Box, bool), /// Deduplicated values /// From 86f20bf3e0397f4d587479a504b3948ac082c0d8 Mon Sep 17 00:00:00 2001 From: Christopher Prohm Date: Sat, 12 Apr 2025 13:49:38 +0200 Subject: [PATCH 6/6] Bump version --- Cargo.lock | 2 +- marrow/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 77e574b..9101ba3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1203,7 +1203,7 @@ checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" [[package]] name = "marrow" -version = "0.2.2" +version = "0.2.3" dependencies = [ "arrow-array 37.0.0", "arrow-array 38.0.0", diff --git a/marrow/Cargo.toml b/marrow/Cargo.toml index bb60248..10d139e 100644 --- a/marrow/Cargo.toml +++ b/marrow/Cargo.toml @@ -2,7 +2,7 @@ edition = "2021" name = "marrow" -version = "0.2.2" +version = "0.2.3" authors = ["Christopher Prohm "] description = "Minimalist Arrow interop" readme = "../Readme.md"