diff --git a/arrow-json/Cargo.toml b/arrow-json/Cargo.toml index f2653ec4e46e..e10b69eaf4aa 100644 --- a/arrow-json/Cargo.toml +++ b/arrow-json/Cargo.toml @@ -35,12 +35,18 @@ bench = false [package.metadata.docs.rs] all-features = true +[features] +default = [] +variant_experimental = [] + [dependencies] arrow-array = { workspace = true } arrow-buffer = { workspace = true } arrow-cast = { workspace = true } arrow-data = { workspace = true } arrow-schema = { workspace = true } +parquet-variant-compute = { workspace = true } +parquet-variant = { workspace = true } half = { version = "2.1", default-features = false } indexmap = { version = "2.0", default-features = false, features = ["std"] } num-traits = { version = "0.2.19", default-features = false, features = ["std"] } diff --git a/arrow-json/src/reader/list_array.rs b/arrow-json/src/reader/list_array.rs index e74fef79178a..4c9a4e13bef9 100644 --- a/arrow-json/src/reader/list_array.rs +++ b/arrow-json/src/reader/list_array.rs @@ -46,6 +46,7 @@ impl ListArrayDecoder { _ => unreachable!(), }; let decoder = make_decoder( + Some(field.clone()), field.data_type().clone(), coerce_primitive, strict_mode, diff --git a/arrow-json/src/reader/map_array.rs b/arrow-json/src/reader/map_array.rs index c2068577a094..ef8e841ef103 100644 --- a/arrow-json/src/reader/map_array.rs +++ b/arrow-json/src/reader/map_array.rs @@ -57,6 +57,7 @@ impl MapArrayDecoder { }; let keys = make_decoder( + Some(fields[0].clone()), fields[0].data_type().clone(), coerce_primitive, strict_mode, @@ -64,6 +65,7 @@ impl MapArrayDecoder { struct_mode, )?; let values = make_decoder( + Some(fields[1].clone()), fields[1].data_type().clone(), coerce_primitive, strict_mode, diff --git a/arrow-json/src/reader/mod.rs b/arrow-json/src/reader/mod.rs index f5fd1a8e7c38..43929eb4a3c9 100644 --- a/arrow-json/src/reader/mod.rs +++ b/arrow-json/src/reader/mod.rs @@ -141,6 +141,8 @@ use std::io::BufRead; use std::sync::Arc; use chrono::Utc; +#[cfg(feature = "variant_experimental")] +use parquet_variant_compute::VariantType; use serde_core::Serialize; use arrow_array::timezone::Tz; @@ -162,6 +164,9 @@ use crate::reader::struct_array::StructArrayDecoder; use crate::reader::tape::{Tape, TapeDecoder}; use crate::reader::timestamp_array::TimestampArrayDecoder; +#[cfg(feature = "variant_experimental")] +use crate::reader::variant_array::VariantArrayDecoder; + mod binary_array; mod boolean_array; mod decimal_array; @@ -176,6 +181,8 @@ mod string_view_array; mod struct_array; mod tape; mod timestamp_array; +#[cfg(feature = "variant_experimental")] +mod variant_array; /// A builder for [`Reader`] and [`Decoder`] pub struct ReaderBuilder { @@ -304,6 +311,7 @@ impl ReaderBuilder { }; let decoder = make_decoder( + None, data_type, self.coerce_primitive, self.strict_mode, @@ -686,12 +694,18 @@ macro_rules! primitive_decoder { } fn make_decoder( + field: Option, data_type: DataType, coerce_primitive: bool, strict_mode: bool, is_nullable: bool, struct_mode: StructMode, ) -> Result, ArrowError> { + #[cfg(feature = "variant_experimental")] + if let Some(field) = field && field.try_extension_type::().is_ok() { + return Ok(Box::new(VariantArrayDecoder{})); + } + downcast_integer! { data_type => (primitive_decoder, data_type), DataType::Null => Ok(Box::::default()), @@ -758,6 +772,7 @@ fn make_decoder( #[cfg(test)] mod tests { + use parquet_variant::VariantBuilder; use serde_json::json; use std::fs::File; use std::io::{BufReader, Cursor, Seek}; @@ -2771,6 +2786,106 @@ mod tests { ); } + #[cfg(feature = "variant_experimental")] + #[test] + fn test_variant() { + use parquet_variant::Variant; + use parquet_variant_compute::VariantArrayBuilder; + + let do_test = |json_input: &str, ids: Vec, variants: Vec| { + let variant_array = VariantArrayBuilder::new(0).build(); + + let struct_field = Schema::new(vec![ + Field::new("id", DataType::Int32, false), + // call VariantArray::field to get the correct Field + variant_array.field("var"), + ]); + + let builder = ReaderBuilder::new(Arc::new(struct_field.clone())); + let result = builder + .with_struct_mode(StructMode::ObjectOnly) + .build(Cursor::new(json_input.as_bytes())) + .unwrap() + .next() + .unwrap() + .unwrap(); + + let int_array = arrow_array::array::Int32Array::from(ids); + + let variant_array = { + let mut variant_builder = VariantArrayBuilder::new(variants.len()); + for v in variants { + variant_builder.append_variant(v); + } + variant_builder.build() + }; + + let variant_struct_array: StructArray = variant_array.into(); + + let expected = RecordBatch::try_new( + struct_field.into(), + vec![Arc::new(int_array), Arc::new(variant_struct_array)], + ) + .unwrap(); + + assert_eq!(result, expected); + }; + + do_test( + "{\"id\": 1, \"var\": \"a\"}\n{\"id\": 2, \"var\": \"b\"}", + vec![1, 2], + vec![Variant::from("a"), Variant::from("b")], + ); + + let mut builder = VariantBuilder::new(); + let mut object_builder = builder.new_object(); + object_builder.insert("int64", Variant::Int64(1)); + object_builder.insert("double", Variant::Double(1.0)); + object_builder.insert("null", Variant::Null); + object_builder.insert("true", Variant::BooleanTrue); + object_builder.insert("false", Variant::BooleanFalse); + object_builder.insert("string", Variant::from("a")); + object_builder.finish(); + let (metadata, value) = builder.finish(); + let variant = Variant::try_new(&metadata, &value).unwrap(); + + do_test( + "{\"id\": 1, \"var\": {\"int64\": 1, \"double\": 1.0, \"null\": null, \"true\": true, \"false\": false, \"string\": \"a\"}}", + vec![1], + vec![variant], + ); + + // nested structs + let mut builder = VariantBuilder::new(); + let mut object_builder = builder.new_object(); + { + let mut list_builder = object_builder.new_list("somelist"); + { + let mut nested_object_builder = list_builder.new_object(); + nested_object_builder.insert("num", Variant::Int64(2)); + nested_object_builder.finish(); + } + { + let mut nested_object_builder = list_builder.new_object(); + nested_object_builder.insert("num", Variant::Int64(3)); + nested_object_builder.finish(); + } + list_builder.finish(); + object_builder.insert("scalar", Variant::from("a")); + } + object_builder.finish(); + + let (metadata, value) = builder.finish(); + let variant = Variant::try_new(&metadata, &value).unwrap(); + + do_test( + "{\"id\": 1, \"var\": {\"somelist\": [{\"num\": 2}, {\"num\": 3}], \"scalar\": \"a\"}}", + vec![1], + vec![variant], + ); + } + + #[test] fn test_decode_list_struct_with_wrong_types() { let int_field = Field::new("a", DataType::Int32, true); diff --git a/arrow-json/src/reader/struct_array.rs b/arrow-json/src/reader/struct_array.rs index 262097ace396..916393985aba 100644 --- a/arrow-json/src/reader/struct_array.rs +++ b/arrow-json/src/reader/struct_array.rs @@ -46,6 +46,7 @@ impl StructArrayDecoder { // it doesn't contain any nulls not masked by its parent let nullable = f.is_nullable() || is_nullable; make_decoder( + Some(f.clone()), f.data_type().clone(), coerce_primitive, strict_mode, diff --git a/arrow-json/src/reader/variant_array.rs b/arrow-json/src/reader/variant_array.rs new file mode 100644 index 000000000000..725240d7a870 --- /dev/null +++ b/arrow-json/src/reader/variant_array.rs @@ -0,0 +1,99 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow_array::{Array, StructArray}; +use parquet_variant::{ObjectFieldBuilder, Variant, VariantBuilder, VariantBuilderExt}; +use parquet_variant_compute::VariantArrayBuilder; +use arrow_data::ArrayData; +use arrow_schema::ArrowError; + +use crate::reader::ArrayDecoder; +use crate::reader::tape::{Tape, TapeElement}; + +#[derive(Default)] +pub struct VariantArrayDecoder {} + +impl ArrayDecoder for VariantArrayDecoder { + fn decode(&mut self, tape: &Tape<'_>, pos: &[u32]) -> Result { + let mut array_builder = VariantArrayBuilder::new(pos.len()); + for p in pos { + let mut builder = VariantBuilder::new(); + variant_from_tape_element(&mut builder, *p, tape)?; + let (metadata, value) = builder.finish(); + array_builder.append_value(Variant::new(&metadata, &value)); + } + let variant_struct_array: StructArray = array_builder.build().into(); + Ok(variant_struct_array.into_data()) + } +} + +fn variant_from_tape_element(builder: &mut impl VariantBuilderExt, mut p: u32, tape: &Tape) -> Result { + match tape.get(p) { + TapeElement::StartObject(end_idx) => { + let mut object_builder = builder.try_new_object()?; + p += 1; + while p < end_idx { + // Read field name + let field_name = match tape.get(p) { + TapeElement::String(s) => tape.get_string(s), + _ => return Err(tape.error(p, "field name")), + }; + + let mut field_builder = ObjectFieldBuilder::new(field_name, &mut object_builder); + p = tape.next(p, "field value")?; + p = variant_from_tape_element(&mut field_builder, p, tape)?; + } + object_builder.finish(); + } + TapeElement::EndObject(_u32) => unreachable!(), + TapeElement::StartList(end_idx) => { + let mut list_builder = builder.try_new_list()?; + p+= 1; + while p < end_idx { + p = variant_from_tape_element(&mut list_builder, p, tape)?; + } + list_builder.finish(); + } + TapeElement::EndList(_u32) => unreachable!(), + TapeElement::String(idx) => builder.append_value(tape.get_string(idx)), + TapeElement::Number(idx) => { + let s = tape.get_string(idx); + builder.append_value(parse_number(s)?) + }, + TapeElement::I64(i) => builder.append_value(i), + TapeElement::I32(i) => builder.append_value(i), + TapeElement::F64(f) => builder.append_value(f), + TapeElement::F32(f) => builder.append_value(f), + TapeElement::True => builder.append_value(true), + TapeElement::False => builder.append_value(false), + TapeElement::Null => builder.append_value(Variant::Null), + } + p += 1; + Ok(p) +} + +fn parse_number<'a, 'b>(s: &'a str) -> Result, ArrowError> { + match lexical_core::parse::(s.as_bytes()) { + Ok(v) => Ok(Variant::from(v)), + Err(_) => { + match lexical_core::parse::(s.as_bytes()) { + Ok(v) => Ok(Variant::from(v)), + Err(_) => Err(ArrowError::JsonError(format!("failed to parse {s} as number"))), + } + } + } +} \ No newline at end of file diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml index 4200cd7a6c78..92b47388c6bc 100644 --- a/arrow/Cargo.toml +++ b/arrow/Cargo.toml @@ -81,6 +81,7 @@ force_validate = ["arrow-array/force_validate", "arrow-data/force_validate"] ffi = ["arrow-schema/ffi", "arrow-data/ffi", "arrow-array/ffi"] chrono-tz = ["arrow-array/chrono-tz"] canonical_extension_types = ["arrow-schema/canonical_extension_types"] +variant_experimental = ["arrow-json?/variant_experimental"] [dev-dependencies] chrono = { workspace = true } diff --git a/parquet-variant-compute/Cargo.toml b/parquet-variant-compute/Cargo.toml index 74c3dd3fb72f..4f33b08b9471 100644 --- a/parquet-variant-compute/Cargo.toml +++ b/parquet-variant-compute/Cargo.toml @@ -29,8 +29,10 @@ rust-version = { workspace = true } [dependencies] -arrow = { workspace = true , features = ["canonical_extension_types"]} -arrow-schema = { workspace = true } +arrow-array = { workspace = true } +arrow-buffer = { workspace = true } +arrow-cast = { workspace = true } +arrow-schema = { workspace = true, features = ["canonical_extension_types"] } half = { version = "2.1", default-features = false } indexmap = "2.10.0" parquet-variant = { workspace = true } diff --git a/parquet-variant-compute/src/arrow_to_variant.rs b/parquet-variant-compute/src/arrow_to_variant.rs index 5e01aba3c1a1..e38a9211a36e 100644 --- a/parquet-variant-compute/src/arrow_to_variant.rs +++ b/parquet-variant-compute/src/arrow_to_variant.rs @@ -16,16 +16,17 @@ // under the License. use crate::type_conversion::CastOptions; -use arrow::array::{ - Array, AsArray, FixedSizeListArray, GenericBinaryArray, GenericListArray, GenericListViewArray, +use arrow_array::{ + Array, cast::AsArray, FixedSizeListArray, GenericBinaryArray, GenericListArray, GenericListViewArray, GenericStringArray, OffsetSizeTrait, PrimitiveArray, }; -use arrow::compute::kernels::cast; -use arrow::datatypes::{ - self as datatypes, ArrowNativeType, ArrowPrimitiveType, ArrowTemporalType, ArrowTimestampType, +use arrow_cast::cast; +use arrow_array::types::{ + self as datatypes, ArrowPrimitiveType, ArrowTemporalType, ArrowTimestampType, DecimalType, RunEndIndexType, }; -use arrow::temporal_conversions::{as_date, as_datetime, as_time}; +use arrow_buffer::ArrowNativeType; +use arrow_array::temporal_conversions::{as_date, as_datetime, as_time}; use arrow_schema::{ArrowError, DataType, TimeUnit}; use chrono::{DateTime, TimeZone, Utc}; use parquet_variant::{ @@ -390,7 +391,7 @@ macro_rules! define_row_builder { define_row_builder!( struct BooleanArrowToVariantBuilder<'a>, - |array| -> arrow::array::BooleanArray { array.as_boolean() } + |array| -> arrow_array::BooleanArray { array.as_boolean() } ); define_row_builder!( @@ -417,7 +418,7 @@ define_row_builder!( options: &'a CastOptions, scale: i8, }, - |array| -> arrow::array::Decimal256Array { array.as_primitive() }, + |array| -> arrow_array::Decimal256Array { array.as_primitive() }, |value| -> Option<_> { let value = value.to_i128(); value.and_then(|v| VariantDecimal16::try_new_with_signed_scale(v, *scale).ok()) @@ -480,12 +481,12 @@ define_row_builder!( define_row_builder!( struct BinaryViewArrowToVariantBuilder<'a>, - |array| -> arrow::array::BinaryViewArray { array.as_byte_view() } + |array| -> arrow_array::BinaryViewArray { array.as_byte_view() } ); define_row_builder!( struct FixedSizeBinaryArrowToVariantBuilder<'a>, - |array| -> arrow::array::FixedSizeBinaryArray { array.as_fixed_size_binary() } + |array| -> arrow_array::FixedSizeBinaryArray { array.as_fixed_size_binary() } ); define_row_builder!( @@ -495,7 +496,7 @@ define_row_builder!( define_row_builder!( struct StringViewArrowToVariantBuilder<'a>, - |array| -> arrow::array::StringViewArray { array.as_string_view() } + |array| -> arrow_array::StringViewArray { array.as_string_view() } ); /// Null builder that always appends null @@ -603,13 +604,13 @@ impl ListLikeArray for FixedSizeListArray { /// Struct builder for StructArray pub(crate) struct StructArrowToVariantBuilder<'a> { - struct_array: &'a arrow::array::StructArray, + struct_array: &'a arrow_array::StructArray, field_builders: Vec<(&'a str, ArrowToVariantRowBuilder<'a>)>, } impl<'a> StructArrowToVariantBuilder<'a> { pub(crate) fn new( - struct_array: &'a arrow::array::StructArray, + struct_array: &'a arrow_array::StructArray, options: &'a CastOptions, ) -> Result { let mut field_builders = Vec::new(); @@ -659,8 +660,8 @@ impl<'a> StructArrowToVariantBuilder<'a> { /// Map builder for MapArray types pub(crate) struct MapArrowToVariantBuilder<'a> { - map_array: &'a arrow::array::MapArray, - key_strings: arrow::array::StringArray, + map_array: &'a arrow_array::MapArray, + key_strings: arrow_array::StringArray, values_builder: Box>, } @@ -719,7 +720,7 @@ impl<'a> MapArrowToVariantBuilder<'a> { /// /// NOTE: Union type ids are _not_ required to be dense, hence the hash map for child builders. pub(crate) struct UnionArrowToVariantBuilder<'a> { - union_array: &'a arrow::array::UnionArray, + union_array: &'a arrow_array::UnionArray, child_builders: HashMap>>, } @@ -808,7 +809,7 @@ impl<'a> DictionaryArrowToVariantBuilder<'a> { /// Run-end encoded array builder with efficient sequential access pub(crate) struct RunEndEncodedArrowToVariantBuilder<'a, R: RunEndIndexType> { - run_array: &'a arrow::array::RunArray, + run_array: &'a arrow_array::RunArray, values_builder: Box>, run_ends: &'a [R::Native], @@ -893,7 +894,7 @@ impl<'a, R: RunEndIndexType> RunEndEncodedArrowToVariantBuilder<'a, R> { mod tests { use super::*; use crate::{VariantArray, VariantArrayBuilder}; - use arrow::array::{ArrayRef, BooleanArray, Int32Array, StringArray}; + use arrow_array::{ArrayRef, BooleanArray, Int32Array, StringArray}; use arrow::datatypes::Int32Type; use std::sync::Arc; @@ -980,7 +981,7 @@ mod tests { #[test] fn test_struct_row_builder() { - use arrow::array::{ArrayRef, Int32Array, StringArray, StructArray}; + use arrow_array::{ArrayRef, Int32Array, StringArray, StructArray}; use arrow_schema::{DataType, Field}; use std::sync::Arc; @@ -1027,7 +1028,7 @@ mod tests { #[test] fn test_run_end_encoded_row_builder() { - use arrow::array::{Int32Array, RunArray}; + use arrow_array::{Int32Array, RunArray}; use arrow::datatypes::Int32Type; // Create a run-end encoded array: [A, A, B, B, B, C] @@ -1050,7 +1051,7 @@ mod tests { #[test] fn test_run_end_encoded_random_access() { - use arrow::array::{Int32Array, RunArray}; + use arrow_array::{Int32Array, RunArray}; use arrow::datatypes::Int32Type; // Create a run-end encoded array: [A, A, B, B, B, C] @@ -1076,7 +1077,7 @@ mod tests { #[test] fn test_run_end_encoded_with_nulls() { - use arrow::array::{Int32Array, RunArray}; + use arrow_array::{Int32Array, RunArray}; use arrow::datatypes::Int32Type; // Create a run-end encoded array with null values: [A, A, null, null, B] @@ -1107,7 +1108,7 @@ mod tests { #[test] fn test_dictionary_row_builder() { - use arrow::array::{DictionaryArray, Int32Array}; + use arrow_array::{DictionaryArray, Int32Array}; use arrow::datatypes::Int32Type; // Create a dictionary array: keys=[0, 1, 0, 2, 1], values=["apple", "banana", "cherry"] @@ -1127,7 +1128,7 @@ mod tests { #[test] fn test_dictionary_with_nulls() { - use arrow::array::{DictionaryArray, Int32Array}; + use arrow_array::{DictionaryArray, Int32Array}; use arrow::datatypes::Int32Type; // Create a dictionary array with null keys: keys=[0, null, 1, null, 2], values=["x", "y", "z"] @@ -1159,7 +1160,7 @@ mod tests { #[test] fn test_dictionary_random_access() { - use arrow::array::{DictionaryArray, Int32Array}; + use arrow_array::{DictionaryArray, Int32Array}; use arrow::datatypes::Int32Type; // Create a dictionary array: keys=[0, 1, 2, 0, 1, 2], values=["red", "green", "blue"] @@ -1186,7 +1187,7 @@ mod tests { #[test] fn test_nested_dictionary() { - use arrow::array::{DictionaryArray, Int32Array, StructArray}; + use arrow_array::{DictionaryArray, Int32Array, StructArray}; use arrow::datatypes::{Field, Int32Type}; // Create a dictionary with struct values @@ -1250,7 +1251,7 @@ mod tests { #[test] fn test_list_row_builder() { - use arrow::array::ListArray; + use arrow_array::ListArray; // Create a list array: [[1, 2], [3, 4, 5], null, []] let data = vec![ @@ -1289,7 +1290,7 @@ mod tests { #[test] fn test_sliced_list_row_builder() { - use arrow::array::ListArray; + use arrow_array::ListArray; // Create a list array: [[1, 2], [3, 4, 5], [6]] let data = vec![ @@ -1328,7 +1329,7 @@ mod tests { #[test] fn test_nested_list_row_builder() { - use arrow::array::ListArray; + use arrow_array::ListArray; use arrow::datatypes::Field; // Build the nested structure manually @@ -1385,7 +1386,7 @@ mod tests { #[test] fn test_map_row_builder() { - use arrow::array::{Int32Array, MapArray, StringArray, StructArray}; + use arrow_array::{Int32Array, MapArray, StringArray, StructArray}; use arrow::buffer::{NullBuffer, OffsetBuffer}; use arrow::datatypes::{DataType, Field, Fields}; use std::sync::Arc; @@ -1456,7 +1457,7 @@ mod tests { #[test] fn test_union_sparse_row_builder() { - use arrow::array::{Float64Array, Int32Array, StringArray, UnionArray}; + use arrow_array::{Float64Array, Int32Array, StringArray, UnionArray}; use arrow::buffer::ScalarBuffer; use arrow::datatypes::{DataType, Field, UnionFields}; use std::sync::Arc; @@ -1501,7 +1502,7 @@ mod tests { #[test] fn test_union_dense_row_builder() { - use arrow::array::{Float64Array, Int32Array, StringArray, UnionArray}; + use arrow_array::{Float64Array, Int32Array, StringArray, UnionArray}; use arrow::buffer::ScalarBuffer; use arrow::datatypes::{DataType, Field, UnionFields}; use std::sync::Arc; @@ -1561,7 +1562,7 @@ mod tests { #[test] fn test_union_sparse_type_ids_row_builder() { - use arrow::array::{Int32Array, StringArray, UnionArray}; + use arrow_array::{Int32Array, StringArray, UnionArray}; use arrow::buffer::ScalarBuffer; use arrow::datatypes::{DataType, Field, UnionFields}; use std::sync::Arc; @@ -1613,7 +1614,7 @@ mod tests { #[test] fn test_decimal32_row_builder() { - use arrow::array::Decimal32Array; + use arrow_array::Decimal32Array; use parquet_variant::VariantDecimal4; // Test Decimal32Array with scale 2 (e.g., for currency: 12.34) @@ -1633,7 +1634,7 @@ mod tests { #[test] fn test_decimal128_row_builder() { - use arrow::array::Decimal128Array; + use arrow_array::Decimal128Array; use parquet_variant::VariantDecimal16; // Test Decimal128Array with negative scale (multiply by 10^|scale|) @@ -1653,7 +1654,7 @@ mod tests { #[test] fn test_decimal256_overflow_row_builder() { - use arrow::array::Decimal256Array; + use arrow_array::Decimal256Array; use arrow::datatypes::i256; // Test Decimal256Array with a value that overflows i128 @@ -1674,7 +1675,7 @@ mod tests { #[test] fn test_binary_row_builder() { - use arrow::array::BinaryArray; + use arrow_array::BinaryArray; let binary_data = vec![ Some(b"hello".as_slice()), @@ -1697,7 +1698,7 @@ mod tests { #[test] fn test_binary_view_row_builder() { - use arrow::array::BinaryViewArray; + use arrow_array::BinaryViewArray; let binary_data = vec![ Some(b"short".as_slice()), @@ -1720,7 +1721,7 @@ mod tests { #[test] fn test_fixed_size_binary_row_builder() { - use arrow::array::FixedSizeBinaryArray; + use arrow_array::FixedSizeBinaryArray; let binary_data = vec![ Some([0x01, 0x02, 0x03, 0x04]), @@ -1743,7 +1744,7 @@ mod tests { #[test] fn test_utf8_view_row_builder() { - use arrow::array::StringViewArray; + use arrow_array::StringViewArray; let string_data = vec![ Some("short"), @@ -1766,7 +1767,7 @@ mod tests { #[test] fn test_timestamp_second_row_builder() { - use arrow::array::TimestampSecondArray; + use arrow_array::TimestampSecondArray; let timestamp_data = vec![ Some(1609459200), // 2021-01-01 00:00:00 UTC @@ -1790,7 +1791,7 @@ mod tests { #[test] fn test_timestamp_with_timezone_row_builder() { - use arrow::array::TimestampMicrosecondArray; + use arrow_array::TimestampMicrosecondArray; use chrono::DateTime; let timestamp_data = vec![ @@ -1817,7 +1818,7 @@ mod tests { #[test] fn test_timestamp_nanosecond_precision_row_builder() { - use arrow::array::TimestampNanosecondArray; + use arrow_array::TimestampNanosecondArray; let timestamp_data = vec![ Some(1609459200123456789), // 2021-01-01 00:00:00.123456789 UTC @@ -1843,7 +1844,7 @@ mod tests { #[test] fn test_timestamp_millisecond_row_builder() { - use arrow::array::TimestampMillisecondArray; + use arrow_array::TimestampMillisecondArray; let timestamp_data = vec![ Some(1609459200123), // 2021-01-01 00:00:00.123 UTC @@ -1869,7 +1870,7 @@ mod tests { #[test] fn test_date32_row_builder() { - use arrow::array::Date32Array; + use arrow_array::Date32Array; use chrono::NaiveDate; let date_data = vec![ @@ -1897,7 +1898,7 @@ mod tests { #[test] fn test_date64_row_builder() { - use arrow::array::Date64Array; + use arrow_array::Date64Array; use chrono::NaiveDate; // Test Date64Array with various dates (milliseconds since epoch) @@ -1926,7 +1927,7 @@ mod tests { #[test] fn test_time32_second_row_builder() { - use arrow::array::Time32SecondArray; + use arrow_array::Time32SecondArray; use chrono::NaiveTime; // Test Time32SecondArray with various times (seconds since midnight) @@ -1955,7 +1956,7 @@ mod tests { #[test] fn test_time32_millisecond_row_builder() { - use arrow::array::Time32MillisecondArray; + use arrow_array::Time32MillisecondArray; use chrono::NaiveTime; // Test Time32MillisecondArray with various times (milliseconds since midnight) @@ -1984,7 +1985,7 @@ mod tests { #[test] fn test_time64_microsecond_row_builder() { - use arrow::array::Time64MicrosecondArray; + use arrow_array::Time64MicrosecondArray; use chrono::NaiveTime; // Test Time64MicrosecondArray with various times (microseconds since midnight) @@ -2013,7 +2014,7 @@ mod tests { #[test] fn test_time64_nanosecond_row_builder() { - use arrow::array::Time64NanosecondArray; + use arrow_array::Time64NanosecondArray; use chrono::NaiveTime; // Test Time64NanosecondArray with various times (nanoseconds since midnight) diff --git a/parquet-variant-compute/src/cast_to_variant.rs b/parquet-variant-compute/src/cast_to_variant.rs index 4f400a5f7bd5..36db434f2488 100644 --- a/parquet-variant-compute/src/cast_to_variant.rs +++ b/parquet-variant-compute/src/cast_to_variant.rs @@ -17,7 +17,7 @@ use crate::arrow_to_variant::make_arrow_to_variant_row_builder; use crate::{CastOptions, VariantArray, VariantArrayBuilder}; -use arrow::array::Array; +use arrow_array::Array; use arrow_schema::ArrowError; /// Casts a typed arrow [`Array`] to a [`VariantArray`]. This is useful when you diff --git a/parquet-variant-compute/src/from_json.rs b/parquet-variant-compute/src/from_json.rs index 0983147132a2..33190edfe69b 100644 --- a/parquet-variant-compute/src/from_json.rs +++ b/parquet-variant-compute/src/from_json.rs @@ -19,7 +19,7 @@ //! STRUCT use crate::{VariantArray, VariantArrayBuilder}; -use arrow::array::{Array, ArrayRef, LargeStringArray, StringArray, StringViewArray}; +use arrow_array::{Array, ArrayRef, LargeStringArray, StringArray, StringViewArray}; use arrow_schema::ArrowError; use parquet_variant_json::JsonToVariant; diff --git a/parquet-variant-compute/src/shred_variant.rs b/parquet-variant-compute/src/shred_variant.rs index 7b8dc28d5f1a..58967e723638 100644 --- a/parquet-variant-compute/src/shred_variant.rs +++ b/parquet-variant-compute/src/shred_variant.rs @@ -22,11 +22,11 @@ use crate::variant_to_arrow::{ PrimitiveVariantToArrowRowBuilder, make_primitive_variant_to_arrow_row_builder, }; use crate::{VariantArray, VariantValueArrayBuilder}; -use arrow::array::{ArrayRef, BinaryViewArray, NullBufferBuilder}; -use arrow::buffer::NullBuffer; -use arrow::compute::CastOptions; -use arrow::datatypes::{DataType, Field, FieldRef, Fields, TimeUnit}; -use arrow::error::{ArrowError, Result}; +use arrow_array::{ArrayRef, BinaryViewArray}; +use arrow_buffer::{NullBuffer, NullBufferBuilder}; +use arrow_cast::CastOptions; +use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields, TimeUnit}; +use std::result::Result; use parquet_variant::{Variant, VariantBuilderExt, VariantPath, VariantPathElement}; use indexmap::IndexMap; @@ -67,7 +67,7 @@ use std::sync::Arc; /// /// See [`ShreddedSchemaBuilder`] for a convenient way to build the `as_type` /// value passed to this function. -pub fn shred_variant(array: &VariantArray, as_type: &DataType) -> Result { +pub fn shred_variant(array: &VariantArray, as_type: &DataType) -> Result { if array.typed_value_field().is_some() { return Err(ArrowError::InvalidArgumentError( "Input is already shredded".to_string(), @@ -107,7 +107,7 @@ pub(crate) fn make_variant_to_shredded_variant_arrow_row_builder<'a>( cast_options: &'a CastOptions, capacity: usize, top_level: bool, -) -> Result> { +) -> Result, ArrowError> { let builder = match data_type { DataType::Struct(fields) => { let typed_value_builder = VariantToShreddedObjectVariantRowBuilder::try_new( @@ -169,7 +169,7 @@ pub(crate) enum VariantToShreddedVariantRowBuilder<'a> { Object(VariantToShreddedObjectVariantRowBuilder<'a>), } impl<'a> VariantToShreddedVariantRowBuilder<'a> { - pub fn append_null(&mut self) -> Result<()> { + pub fn append_null(&mut self) -> Result<(), ArrowError> { use VariantToShreddedVariantRowBuilder::*; match self { Primitive(b) => b.append_null(), @@ -177,7 +177,7 @@ impl<'a> VariantToShreddedVariantRowBuilder<'a> { } } - pub fn append_value(&mut self, value: Variant<'_, '_>) -> Result { + pub fn append_value(&mut self, value: Variant<'_, '_>) -> Result { use VariantToShreddedVariantRowBuilder::*; match self { Primitive(b) => b.append_value(value), @@ -185,7 +185,7 @@ impl<'a> VariantToShreddedVariantRowBuilder<'a> { } } - pub fn finish(self) -> Result<(BinaryViewArray, ArrayRef, Option)> { + pub fn finish(self) -> Result<(BinaryViewArray, ArrayRef, Option), ArrowError> { use VariantToShreddedVariantRowBuilder::*; match self { Primitive(b) => b.finish(), @@ -215,14 +215,14 @@ impl<'a> VariantToShreddedPrimitiveVariantRowBuilder<'a> { top_level, } } - fn append_null(&mut self) -> Result<()> { + fn append_null(&mut self) -> Result<(), ArrowError> { // Only the top-level struct that represents the variant can be nullable; object fields and // array elements are non-nullable. self.nulls.append(!self.top_level); self.value_builder.append_null(); self.typed_value_builder.append_null() } - fn append_value(&mut self, value: Variant<'_, '_>) -> Result { + fn append_value(&mut self, value: Variant<'_, '_>) -> Result { self.nulls.append_non_null(); if self.typed_value_builder.append_value(&value)? { self.value_builder.append_null(); @@ -231,7 +231,7 @@ impl<'a> VariantToShreddedPrimitiveVariantRowBuilder<'a> { } Ok(true) } - fn finish(mut self) -> Result<(BinaryViewArray, ArrayRef, Option)> { + fn finish(mut self) -> Result<(BinaryViewArray, ArrayRef, Option), ArrowError> { Ok(( self.value_builder.build()?, self.typed_value_builder.finish()?, @@ -254,7 +254,7 @@ impl<'a> VariantToShreddedObjectVariantRowBuilder<'a> { cast_options: &'a CastOptions, capacity: usize, top_level: bool, - ) -> Result { + ) -> Result { let typed_value_builders = fields.iter().map(|field| { let builder = make_variant_to_shredded_variant_arrow_row_builder( field.data_type(), @@ -266,14 +266,14 @@ impl<'a> VariantToShreddedObjectVariantRowBuilder<'a> { }); Ok(Self { value_builder: VariantValueArrayBuilder::new(capacity), - typed_value_builders: typed_value_builders.collect::>()?, + typed_value_builders: typed_value_builders.collect::>()?, typed_value_nulls: NullBufferBuilder::new(capacity), nulls: NullBufferBuilder::new(capacity), top_level, }) } - fn append_null(&mut self) -> Result<()> { + fn append_null(&mut self) -> Result<(), ArrowError> { // Only the top-level struct that represents the variant can be nullable; object fields and // array elements are non-nullable. self.nulls.append(!self.top_level); @@ -284,7 +284,7 @@ impl<'a> VariantToShreddedObjectVariantRowBuilder<'a> { } Ok(()) } - fn append_value(&mut self, value: Variant<'_, '_>) -> Result { + fn append_value(&mut self, value: Variant<'_, '_>) -> Result { let Variant::Object(ref obj) = value else { // Not an object => fall back self.nulls.append_non_null(); @@ -333,7 +333,7 @@ impl<'a> VariantToShreddedObjectVariantRowBuilder<'a> { self.nulls.append_non_null(); Ok(true) } - fn finish(mut self) -> Result<(BinaryViewArray, ArrayRef, Option)> { + fn finish(mut self) -> Result<(BinaryViewArray, ArrayRef, Option), ArrowError> { let mut builder = StructArrayBuilder::new(); for (field_name, typed_value_builder) in self.typed_value_builders { let (value, typed_value, nulls) = typed_value_builder.finish()?; @@ -578,7 +578,7 @@ impl VariantSchemaNode { mod tests { use super::*; use crate::VariantArrayBuilder; - use arrow::array::{Array, FixedSizeBinaryArray, Float64Array, Int64Array}; + use arrow_array::{Array, FixedSizeBinaryArray, Float64Array, Int64Array}; use arrow::datatypes::{DataType, Field, Fields, TimeUnit, UnionFields, UnionMode}; use parquet_variant::{ ObjectBuilder, ReadOnlyMetadataBuilder, Variant, VariantBuilder, VariantPath, @@ -772,7 +772,7 @@ mod tests { .typed_value_field() .unwrap() .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); assert_eq!(typed_value_int32.value(0), 42); assert!(typed_value_int32.is_null(1)); // float doesn't convert to int32 @@ -916,7 +916,7 @@ mod tests { .typed_value_field() .unwrap() .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); // Extract score and age fields from typed_value struct @@ -1309,7 +1309,7 @@ mod tests { .typed_value_field() .unwrap() .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); // Extract id and session_id fields from typed_value struct @@ -1616,7 +1616,7 @@ mod tests { .typed_value_field() .unwrap() .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); let time_field = @@ -1636,7 +1636,7 @@ mod tests { .typed_value_field() .unwrap() .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); // Row 0 diff --git a/parquet-variant-compute/src/to_json.rs b/parquet-variant-compute/src/to_json.rs index efe50c2a0987..d3a009cfe051 100644 --- a/parquet-variant-compute/src/to_json.rs +++ b/parquet-variant-compute/src/to_json.rs @@ -18,10 +18,9 @@ //! Module for transforming a batch of Variants represented as //! STRUCT into a batch of JSON strings. -use arrow::array::{Array, ArrayRef, BinaryArray, BooleanBufferBuilder, StringArray, StructArray}; -use arrow::buffer::{Buffer, NullBuffer, OffsetBuffer, ScalarBuffer}; -use arrow::datatypes::DataType; -use arrow_schema::ArrowError; +use arrow_array::{Array, ArrayRef, BinaryArray, StringArray, StructArray}; +use arrow_buffer::{Buffer, NullBuffer, OffsetBuffer, ScalarBuffer, BooleanBufferBuilder}; +use arrow_schema::{ArrowError, DataType}; use parquet_variant::Variant; use parquet_variant_json::VariantToJson; diff --git a/parquet-variant-compute/src/type_conversion.rs b/parquet-variant-compute/src/type_conversion.rs index 01065175653f..d20a45e32907 100644 --- a/parquet-variant-compute/src/type_conversion.rs +++ b/parquet-variant-compute/src/type_conversion.rs @@ -17,8 +17,8 @@ //! Module for transforming a typed arrow `Array` to `VariantArray`. -use arrow::compute::{DecimalCast, rescale_decimal}; -use arrow::datatypes::{ +use arrow_cast::{DecimalCast, rescale_decimal}; +use arrow_array::types::{ self, ArrowPrimitiveType, ArrowTimestampType, Decimal32Type, Decimal64Type, Decimal128Type, DecimalType, }; @@ -73,24 +73,24 @@ macro_rules! impl_timestamp_from_variant { }; } -impl_primitive_from_variant!(datatypes::Int32Type, as_int32); -impl_primitive_from_variant!(datatypes::Int16Type, as_int16); -impl_primitive_from_variant!(datatypes::Int8Type, as_int8); -impl_primitive_from_variant!(datatypes::Int64Type, as_int64); -impl_primitive_from_variant!(datatypes::UInt8Type, as_u8); -impl_primitive_from_variant!(datatypes::UInt16Type, as_u16); -impl_primitive_from_variant!(datatypes::UInt32Type, as_u32); -impl_primitive_from_variant!(datatypes::UInt64Type, as_u64); -impl_primitive_from_variant!(datatypes::Float16Type, as_f16); -impl_primitive_from_variant!(datatypes::Float32Type, as_f32); -impl_primitive_from_variant!(datatypes::Float64Type, as_f64); -impl_primitive_from_variant!(datatypes::Date32Type, as_naive_date, |v| { - Some(datatypes::Date32Type::from_naive_date(v)) +impl_primitive_from_variant!(types::Int32Type, as_int32); +impl_primitive_from_variant!(types::Int16Type, as_int16); +impl_primitive_from_variant!(types::Int8Type, as_int8); +impl_primitive_from_variant!(types::Int64Type, as_int64); +impl_primitive_from_variant!(types::UInt8Type, as_u8); +impl_primitive_from_variant!(types::UInt16Type, as_u16); +impl_primitive_from_variant!(types::UInt32Type, as_u32); +impl_primitive_from_variant!(types::UInt64Type, as_u64); +impl_primitive_from_variant!(types::Float16Type, as_f16); +impl_primitive_from_variant!(types::Float32Type, as_f32); +impl_primitive_from_variant!(types::Float64Type, as_f64); +impl_primitive_from_variant!(types::Date32Type, as_naive_date, |v| { + Some(types::Date32Type::from_naive_date(v)) }); -impl_primitive_from_variant!(datatypes::Date64Type, as_naive_date, |v| { - Some(datatypes::Date64Type::from_naive_date(v)) +impl_primitive_from_variant!(types::Date64Type, as_naive_date, |v| { + Some(types::Date64Type::from_naive_date(v)) }); -impl_primitive_from_variant!(datatypes::Time32SecondType, as_time_utc, |v| { +impl_primitive_from_variant!(types::Time32SecondType, as_time_utc, |v| { // Return None if there are leftover nanoseconds if v.nanosecond() != 0 { None @@ -98,7 +98,7 @@ impl_primitive_from_variant!(datatypes::Time32SecondType, as_time_utc, |v| { Some(v.num_seconds_from_midnight() as i32) } }); -impl_primitive_from_variant!(datatypes::Time32MillisecondType, as_time_utc, |v| { +impl_primitive_from_variant!(types::Time32MillisecondType, as_time_utc, |v| { // Return None if there are leftover microseconds if v.nanosecond() % 1_000_000 != 0 { None @@ -106,15 +106,15 @@ impl_primitive_from_variant!(datatypes::Time32MillisecondType, as_time_utc, |v| Some((v.num_seconds_from_midnight() * 1_000) as i32 + (v.nanosecond() / 1_000_000) as i32) } }); -impl_primitive_from_variant!(datatypes::Time64MicrosecondType, as_time_utc, |v| { +impl_primitive_from_variant!(types::Time64MicrosecondType, as_time_utc, |v| { Some((v.num_seconds_from_midnight() * 1_000_000 + v.nanosecond() / 1_000) as i64) }); -impl_primitive_from_variant!(datatypes::Time64NanosecondType, as_time_utc, |v| { +impl_primitive_from_variant!(types::Time64NanosecondType, as_time_utc, |v| { // convert micro to nano seconds Some(v.num_seconds_from_midnight() as i64 * 1_000_000_000 + v.nanosecond() as i64) }); impl_timestamp_from_variant!( - datatypes::TimestampSecondType, + types::TimestampSecondType, as_timestamp_ntz_nanos, ntz = true, |timestamp| { @@ -127,7 +127,7 @@ impl_timestamp_from_variant!( } ); impl_timestamp_from_variant!( - datatypes::TimestampSecondType, + types::TimestampSecondType, as_timestamp_nanos, ntz = false, |timestamp| { @@ -140,7 +140,7 @@ impl_timestamp_from_variant!( } ); impl_timestamp_from_variant!( - datatypes::TimestampMillisecondType, + types::TimestampMillisecondType, as_timestamp_ntz_nanos, ntz = true, |timestamp| { @@ -153,7 +153,7 @@ impl_timestamp_from_variant!( } ); impl_timestamp_from_variant!( - datatypes::TimestampMillisecondType, + types::TimestampMillisecondType, as_timestamp_nanos, ntz = false, |timestamp| { @@ -166,25 +166,25 @@ impl_timestamp_from_variant!( } ); impl_timestamp_from_variant!( - datatypes::TimestampMicrosecondType, + types::TimestampMicrosecondType, as_timestamp_ntz_micros, ntz = true, Self::make_value, ); impl_timestamp_from_variant!( - datatypes::TimestampMicrosecondType, + types::TimestampMicrosecondType, as_timestamp_micros, ntz = false, |timestamp| Self::make_value(timestamp.naive_utc()) ); impl_timestamp_from_variant!( - datatypes::TimestampNanosecondType, + types::TimestampNanosecondType, as_timestamp_ntz_nanos, ntz = true, Self::make_value ); impl_timestamp_from_variant!( - datatypes::TimestampNanosecondType, + types::TimestampNanosecondType, as_timestamp_nanos, ntz = false, |timestamp| Self::make_value(timestamp.naive_utc()) @@ -299,7 +299,7 @@ macro_rules! generic_conversion_single_value_with_result { Err(e) => Err(ArrowError::CastError(format!( "Cast failed at index {idx} (array type: {ty}): {e}", idx = $index, - ty = <$t as ::arrow::datatypes::ArrowPrimitiveType>::DATA_TYPE + ty = <$t as ::arrow_array::types::ArrowPrimitiveType>::DATA_TYPE ))), } }}; diff --git a/parquet-variant-compute/src/unshred_variant.rs b/parquet-variant-compute/src/unshred_variant.rs index c20bb697903c..81b74b6fa8f0 100644 --- a/parquet-variant-compute/src/unshred_variant.rs +++ b/parquet-variant-compute/src/unshred_variant.rs @@ -19,18 +19,20 @@ use crate::arrow_to_variant::ListLikeArray; use crate::{BorrowedShreddingState, VariantArray, VariantValueArrayBuilder}; -use arrow::array::{ - Array, AsArray as _, BinaryViewArray, BooleanArray, FixedSizeBinaryArray, FixedSizeListArray, +use arrow_array::{ + Array, cast::AsArray as _, BinaryViewArray, BooleanArray, FixedSizeBinaryArray, FixedSizeListArray, GenericListArray, GenericListViewArray, PrimitiveArray, StringArray, StructArray, }; -use arrow::buffer::NullBuffer; -use arrow::datatypes::{ - ArrowPrimitiveType, DataType, Date32Type, Decimal32Type, Decimal64Type, Decimal128Type, +use arrow_buffer::NullBuffer; +use arrow_array::types::{ + ArrowPrimitiveType, Date32Type, Decimal32Type, Decimal64Type, Decimal128Type, DecimalType, Float32Type, Float64Type, Int8Type, Int16Type, Int32Type, Int64Type, - Time64MicrosecondType, TimeUnit, TimestampMicrosecondType, TimestampNanosecondType, + Time64MicrosecondType, TimestampMicrosecondType, TimestampNanosecondType, }; -use arrow::error::{ArrowError, Result}; -use arrow::temporal_conversions::time64us_to_time; +use arrow_schema::{DataType, TimeUnit}; +use arrow_schema::ArrowError; +use std::result::Result; +use arrow_array::temporal_conversions::time64us_to_time; use chrono::{DateTime, Utc}; use indexmap::IndexMap; use parquet_variant::{ @@ -55,7 +57,7 @@ use uuid::Uuid; /// # Errors /// - If the shredded data contains spec violations (e.g., field name conflicts) /// - If unsupported data types are encountered in typed_value columns -pub fn unshred_variant(array: &VariantArray) -> Result { +pub fn unshred_variant(array: &VariantArray) -> Result { // Check if already unshredded (optimization for common case) if array.typed_value_field().is_none() && array.value_field().is_some() { return Ok(array.clone()); @@ -129,7 +131,7 @@ impl<'a> UnshredVariantRowBuilder<'a> { builder: &mut impl VariantBuilderExt, metadata: &VariantMetadata, index: usize, - ) -> Result<()> { + ) -> Result<(), ArrowError> { match self { Self::PrimitiveInt8(b) => b.append_row(builder, metadata, index), Self::PrimitiveInt16(b) => b.append_row(builder, metadata, index), @@ -161,7 +163,7 @@ impl<'a> UnshredVariantRowBuilder<'a> { /// Creates a new UnshredVariantRowBuilder from shredding state /// Returns None for None/None case - caller decides how to handle based on context - fn try_new_opt(shredding_state: BorrowedShreddingState<'a>) -> Result> { + fn try_new_opt(shredding_state: BorrowedShreddingState<'a>) -> Result, ArrowError> { let value = shredding_state.value_field(); let typed_value = shredding_state.typed_value_field(); let Some(typed_value) = typed_value else { @@ -284,7 +286,7 @@ impl<'a> NullUnshredVariantBuilder<'a> { builder: &mut impl VariantBuilderExt, _metadata: &VariantMetadata, index: usize, - ) -> Result<()> { + ) -> Result<(), ArrowError> { if self.nulls.is_some_and(|nulls| nulls.is_null(index)) { builder.append_null(); } else { @@ -296,7 +298,7 @@ impl<'a> NullUnshredVariantBuilder<'a> { /// Builder for arrays that only have value column (already unshredded) struct ValueOnlyUnshredVariantBuilder<'a> { - value: &'a arrow::array::BinaryViewArray, + value: &'a arrow_array::BinaryViewArray, } impl<'a> ValueOnlyUnshredVariantBuilder<'a> { @@ -309,7 +311,7 @@ impl<'a> ValueOnlyUnshredVariantBuilder<'a> { builder: &mut impl VariantBuilderExt, metadata: &VariantMetadata, index: usize, - ) -> Result<()> { + ) -> Result<(), ArrowError> { if self.value.is_null(index) { builder.append_null(); } else { @@ -327,7 +329,7 @@ trait AppendToVariantBuilder: Array { &self, builder: &mut impl VariantBuilderExt, index: usize, - ) -> Result<()>; + ) -> Result<(), ArrowError>; } /// Macro that handles the unshredded case (typed_value is missing or NULL) and returns early if @@ -373,8 +375,8 @@ impl<'a, T: AppendToVariantBuilder> UnshredPrimitiveRowBuilder<'a, T> { &mut self, builder: &mut impl VariantBuilderExt, metadata: &VariantMetadata, - index: usize, - ) -> Result<()> { + index: usize, + ) -> Result<(), ArrowError> { handle_unshredded_case!(self, builder, metadata, index, false); // If we get here, typed_value is valid and value is NULL @@ -390,7 +392,7 @@ macro_rules! impl_append_to_variant_builder { &self, builder: &mut impl VariantBuilderExt, index: usize, - ) -> Result<()> { + ) -> Result<(), ArrowError> { let value = self.value(index); $( let $v = value; @@ -436,11 +438,11 @@ impl_append_to_variant_builder!(FixedSizeBinaryArray, |bytes| { /// Trait for timestamp types to handle conversion to `DateTime` trait TimestampType: ArrowPrimitiveType { - fn to_datetime_utc(value: i64) -> Result>; + fn to_datetime_utc(value: i64) -> Result, ArrowError>; } impl TimestampType for TimestampMicrosecondType { - fn to_datetime_utc(micros: i64) -> Result> { + fn to_datetime_utc(micros: i64) -> Result, ArrowError> { DateTime::from_timestamp_micros(micros).ok_or_else(|| { ArrowError::InvalidArgumentError(format!( "Invalid timestamp microsecond value: {micros}" @@ -450,7 +452,7 @@ impl TimestampType for TimestampMicrosecondType { } impl TimestampType for TimestampNanosecondType { - fn to_datetime_utc(nanos: i64) -> Result> { + fn to_datetime_utc(nanos: i64) -> Result, ArrowError> { Ok(DateTime::from_timestamp_nanos(nanos)) } } @@ -480,7 +482,7 @@ impl<'a, T: TimestampType> TimestampUnshredRowBuilder<'a, T> { builder: &mut impl VariantBuilderExt, metadata: &VariantMetadata, index: usize, - ) -> Result<()> { + ) -> Result<(), ArrowError> { handle_unshredded_case!(self, builder, metadata, index, false); // If we get here, typed_value is valid and value is NULL @@ -524,7 +526,7 @@ where builder: &mut impl VariantBuilderExt, metadata: &VariantMetadata, index: usize, - ) -> Result<()> { + ) -> Result<(), ArrowError> { handle_unshredded_case!(self, builder, metadata, index, false); let raw = self.typed_value.value(index); @@ -536,13 +538,13 @@ where /// Builder for unshredding struct/object types with nested fields struct StructUnshredVariantBuilder<'a> { - value: Option<&'a arrow::array::BinaryViewArray>, - typed_value: &'a arrow::array::StructArray, + value: Option<&'a arrow_array::BinaryViewArray>, + typed_value: &'a arrow_array::StructArray, field_unshredders: IndexMap<&'a str, Option>>, } impl<'a> StructUnshredVariantBuilder<'a> { - fn try_new(value: Option<&'a BinaryViewArray>, typed_value: &'a StructArray) -> Result { + fn try_new(value: Option<&'a BinaryViewArray>, typed_value: &'a StructArray) -> Result { // Create unshredders for each field in constructor let mut field_unshredders = IndexMap::new(); for (field, field_array) in typed_value.fields().iter().zip(typed_value.columns()) { @@ -569,7 +571,7 @@ impl<'a> StructUnshredVariantBuilder<'a> { builder: &mut impl VariantBuilderExt, metadata: &VariantMetadata, index: usize, - ) -> Result<()> { + ) -> Result<(), ArrowError> { let value = handle_unshredded_case!(self, builder, metadata, index, true); // If we get here, typed_value is valid and value may or may not be valid @@ -614,7 +616,7 @@ struct ListUnshredVariantBuilder<'a, L: ListLikeArray> { } impl<'a, L: ListLikeArray> ListUnshredVariantBuilder<'a, L> { - fn try_new(value: Option<&'a BinaryViewArray>, typed_value: &'a L) -> Result { + fn try_new(value: Option<&'a BinaryViewArray>, typed_value: &'a L) -> Result { // Create a recursive unshredder for the list elements // The element type comes from the values array of the list let element_values = typed_value.values(); @@ -647,7 +649,7 @@ impl<'a, L: ListLikeArray> ListUnshredVariantBuilder<'a, L> { builder: &mut impl VariantBuilderExt, metadata: &VariantMetadata, index: usize, - ) -> Result<()> { + ) -> Result<(), ArrowError> { handle_unshredded_case!(self, builder, metadata, index, false); // If we get here, typed_value is valid and value is NULL -- process the list elements diff --git a/parquet-variant-compute/src/variant_array.rs b/parquet-variant-compute/src/variant_array.rs index fb2a08d64193..508d1f68610e 100644 --- a/parquet-variant-compute/src/variant_array.rs +++ b/parquet-variant-compute/src/variant_array.rs @@ -22,15 +22,15 @@ use crate::type_conversion::{ generic_conversion_single_value, generic_conversion_single_value_with_result, primitive_conversion_single_value, }; -use arrow::array::{Array, ArrayRef, AsArray, BinaryViewArray, StructArray}; -use arrow::buffer::NullBuffer; -use arrow::compute::cast; -use arrow::datatypes::{ +use arrow_array::{Array, ArrayRef, cast::AsArray, BinaryViewArray, StructArray}; +use arrow_buffer::NullBuffer; +use arrow_cast::cast; +use arrow_array::types::{ Date32Type, Decimal32Type, Decimal64Type, Decimal128Type, Float16Type, Float32Type, Float64Type, Int8Type, Int16Type, Int32Type, Int64Type, Time64MicrosecondType, TimestampMicrosecondType, TimestampNanosecondType, }; -use arrow::error::Result; +use std::result::Result; use arrow_schema::extension::ExtensionType; use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields, TimeUnit}; use chrono::{DateTime, NaiveTime}; @@ -62,11 +62,11 @@ impl ExtensionType for VariantType { Some(String::new()) } - fn deserialize_metadata(_metadata: Option<&str>) -> Result { + fn deserialize_metadata(_metadata: Option<&str>) -> Result { Ok("") } - fn supports_data_type(&self, data_type: &DataType) -> Result<()> { + fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> { if matches!(data_type, DataType::Struct(_)) { Ok(()) } else { @@ -76,7 +76,7 @@ impl ExtensionType for VariantType { } } - fn try_new(data_type: &DataType, _metadata: Self::Metadata) -> Result { + fn try_new(data_type: &DataType, _metadata: Self::Metadata) -> Result { Self.supports_data_type(data_type)?; Ok(Self) } @@ -253,7 +253,7 @@ impl VariantArray { /// int8. /// /// Currently, only [`BinaryViewArray`] are supported. - pub fn try_new(inner: &dyn Array) -> Result { + pub fn try_new(inner: &dyn Array) -> Result { // Workaround lack of support for Binary // https://github.com/apache/arrow-rs/issues/8387 let inner = cast_to_binary_view_arrays(inner)?; @@ -367,7 +367,7 @@ impl VariantArray { /// /// Note: Does not do deep validation of the [`Variant`], so it is up to the /// caller to ensure that the metadata and value were constructed correctly. - pub fn try_value(&self, index: usize) -> Result> { + pub fn try_value(&self, index: usize) -> Result, ArrowError> { match (self.typed_value_field(), self.value_field()) { // Always prefer typed_value, if available (Some(typed_value), value) if typed_value.is_valid(index) => { @@ -627,7 +627,7 @@ impl ShreddedVariantFieldArray { /// or be a list, large_list, list_view or struct /// /// Currently, only `value` columns of type [`BinaryViewArray`] are supported. - pub fn try_new(inner: &dyn Array) -> Result { + pub fn try_new(inner: &dyn Array) -> Result { let Some(inner_struct) = inner.as_struct_opt() else { return Err(ArrowError::InvalidArgumentError( "Invalid ShreddedVariantFieldArray: requires StructArray as input".to_string(), @@ -859,7 +859,7 @@ impl<'a> BorrowedShreddingState<'a> { impl<'a> TryFrom<&'a StructArray> for BorrowedShreddingState<'a> { type Error = ArrowError; - fn try_from(inner_struct: &'a StructArray) -> Result { + fn try_from(inner_struct: &'a StructArray) -> Result { // The `value` column need not exist, but if it does it must be a binary view. let value = if let Some(value_col) = inner_struct.column_by_name("value") { let Some(binary_view) = value_col.as_binary_view_opt() else { @@ -880,7 +880,7 @@ impl<'a> TryFrom<&'a StructArray> for BorrowedShreddingState<'a> { impl TryFrom<&StructArray> for ShreddingState { type Error = ArrowError; - fn try_from(inner_struct: &StructArray) -> Result { + fn try_from(inner_struct: &StructArray) -> Result { Ok(BorrowedShreddingState::try_from(inner_struct)?.into()) } } @@ -938,7 +938,7 @@ fn typed_value_to_variant<'a>( typed_value: &'a ArrayRef, value: Option<&BinaryViewArray>, index: usize, -) -> Result> { +) -> Result, ArrowError> { let data_type = typed_value.data_type(); if value.is_some_and(|v| !matches!(data_type, DataType::Struct(_)) && v.is_valid(index)) { // Only a partially shredded struct is allowed to have values for both columns @@ -1109,7 +1109,7 @@ fn typed_value_to_variant<'a>( /// * `StructArray` /// /// So cast them to get the right type. -fn cast_to_binary_view_arrays(array: &dyn Array) -> Result { +fn cast_to_binary_view_arrays(array: &dyn Array) -> Result { let new_type = canonicalize_and_verify_data_type(array.data_type())?; if let Cow::Borrowed(_) = new_type { if let Some(array) = array.as_struct_opt() { @@ -1122,7 +1122,7 @@ fn cast_to_binary_view_arrays(array: &dyn Array) -> Result { /// Recursively visits a data type, ensuring that it only contains data types that can legally /// appear in a (possibly shredded) variant array. It also replaces Binary fields with BinaryView, /// since that's what comes back from the parquet reader and what the variant code expects to find. -fn canonicalize_and_verify_data_type(data_type: &DataType) -> Result> { +fn canonicalize_and_verify_data_type(data_type: &DataType) -> Result, ArrowError> { use DataType::*; // helper macros @@ -1220,7 +1220,7 @@ fn canonicalize_and_verify_data_type(data_type: &DataType) -> Result) -> Result>> { +fn canonicalize_and_verify_field(field: &Arc) -> Result>, ArrowError> { let Cow::Owned(new_data_type) = canonicalize_and_verify_data_type(field.data_type())? else { return Ok(Cow::Borrowed(field)); }; diff --git a/parquet-variant-compute/src/variant_array_builder.rs b/parquet-variant-compute/src/variant_array_builder.rs index 86ece0010042..03f28bf87be3 100644 --- a/parquet-variant-compute/src/variant_array_builder.rs +++ b/parquet-variant-compute/src/variant_array_builder.rs @@ -18,7 +18,7 @@ //! [`VariantArrayBuilder`] implementation use crate::VariantArray; -use arrow::array::{ArrayRef, BinaryViewArray, BinaryViewBuilder, NullBufferBuilder, StructArray}; +use arrow_array::{ArrayRef, BinaryViewArray, builder::BinaryViewBuilder, builder::NullBufferBuilder, StructArray}; use arrow_schema::{ArrowError, DataType, Field, Fields}; use parquet_variant::{ BuilderSpecificState, ListBuilder, MetadataBuilder, ObjectBuilder, Variant, VariantBuilderExt, diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index 624c8ae128dc..9acd9339a561 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -14,12 +14,11 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -use arrow::{ - array::{self, Array, ArrayRef, BinaryViewArray, StructArray}, - compute::CastOptions, - datatypes::Field, - error::Result, -}; +use arrow_array::{self, Array, ArrayRef, BinaryViewArray, StructArray}; +use arrow_cast::cast::CastOptions; +use arrow_schema::Field; +use std::result::Result; + use arrow_schema::{ArrowError, DataType, FieldRef}; use parquet_variant::{VariantPath, VariantPathElement}; @@ -27,7 +26,7 @@ use crate::VariantArray; use crate::variant_array::BorrowedShreddingState; use crate::variant_to_arrow::make_variant_to_arrow_row_builder; -use arrow::array::AsArray; +use arrow_array::cast::AsArray; use std::sync::Arc; pub(crate) enum ShreddedPathStep<'a> { @@ -51,7 +50,7 @@ pub(crate) fn follow_shredded_path_element<'a>( shredding_state: &BorrowedShreddingState<'a>, path_element: &VariantPathElement<'_>, cast_options: &CastOptions, -) -> Result> { +) -> Result, ArrowError> { // If the requested path element is not present in `typed_value`, and `value` is missing, then // we know it does not exist; it, and all paths under it, are all-NULL. let missing_path_step = || match shredding_state.value_field() { @@ -117,13 +116,13 @@ fn shredded_get_path( path: &[VariantPathElement<'_>], as_field: Option<&Field>, cast_options: &CastOptions, -) -> Result { +) -> Result { // Helper that creates a new VariantArray from the given nested value and typed_value columns, // properly accounting for accumulated nulls from path traversal let make_target_variant = |value: Option, typed_value: Option, - accumulated_nulls: Option| { + accumulated_nulls: Option| { let metadata = input.metadata_field().clone(); VariantArray::from_parts(metadata, value, typed_value, accumulated_nulls) }; @@ -168,7 +167,7 @@ fn shredded_get_path( ShreddedPathStep::Success(state) => { // Union nulls from the typed_value we just accessed if let Some(typed_value) = shredding_state.typed_value_field() { - accumulated_nulls = arrow::buffer::NullBuffer::union( + accumulated_nulls = arrow_buffer::NullBuffer::union( accumulated_nulls.as_ref(), typed_value.nulls(), ); @@ -180,8 +179,8 @@ fn shredded_get_path( ShreddedPathStep::Missing => { let num_rows = input.len(); let arr = match as_field.map(|f| f.data_type()) { - Some(data_type) => Arc::new(array::new_null_array(data_type, num_rows)) as _, - None => Arc::new(array::NullArray::new(num_rows)) as _, + Some(data_type) => Arc::new(arrow_array::new_null_array(data_type, num_rows)) as _, + None => Arc::new(arrow_array::NullArray::new(num_rows)) as _, }; return Ok(arr); } @@ -226,7 +225,7 @@ fn shredded_get_path( cast_options, ) }) - .collect::>>()?; + .collect::, ArrowError>>()?; let struct_nulls = target.nulls().cloned(); @@ -274,7 +273,7 @@ fn try_perfect_shredding(variant_array: &VariantArray, as_field: &Field) -> Opti /// variant? Caller can pass None as the requested type to fetch a specific path, but it would /// quickly become annoying (and inefficient) to call `variant_get` for each leaf value in a struct or /// list and then try to assemble the results. -pub fn variant_get(input: &ArrayRef, options: GetOptions) -> Result { +pub fn variant_get(input: &ArrayRef, options: GetOptions) -> Result { let variant_array = VariantArray::try_new(input)?; let GetOptions { diff --git a/parquet-variant-compute/src/variant_to_arrow.rs b/parquet-variant-compute/src/variant_to_arrow.rs index 7d4c427fa46a..6ca760f580da 100644 --- a/parquet-variant-compute/src/variant_to_arrow.rs +++ b/parquet-variant-compute/src/variant_to_arrow.rs @@ -15,14 +15,14 @@ // specific language governing permissions and limitations // under the License. -use arrow::array::{ - ArrayRef, BinaryBuilder, BinaryLikeArrayBuilder, BinaryViewArray, BinaryViewBuilder, - BooleanBuilder, FixedSizeBinaryBuilder, LargeBinaryBuilder, LargeStringBuilder, NullArray, - NullBufferBuilder, PrimitiveBuilder, StringBuilder, StringLikeArrayBuilder, StringViewBuilder, +use arrow_array::{ + ArrayRef, builder::BinaryBuilder, builder::BinaryLikeArrayBuilder, builder::BinaryViewBuilder, builder::BooleanBuilder, builder::FixedSizeBinaryBuilder, builder::LargeBinaryBuilder, + builder::LargeStringBuilder, builder::NullBufferBuilder, builder::PrimitiveBuilder, builder::StringBuilder, builder::StringLikeArrayBuilder, builder::StringViewBuilder, NullArray, BinaryViewArray, }; -use arrow::compute::{CastOptions, DecimalCast}; -use arrow::datatypes::{self, DataType, DecimalType}; -use arrow::error::{ArrowError, Result}; +use arrow_cast::{CastOptions, DecimalCast}; +use arrow_array::types::{self, DecimalType}; +use arrow_schema::{ArrowError, DataType}; +use std::result::Result; use parquet_variant::{Variant, VariantPath}; use crate::type_conversion::{ @@ -39,39 +39,39 @@ use std::sync::Arc; pub(crate) enum PrimitiveVariantToArrowRowBuilder<'a> { Null(VariantToNullArrowRowBuilder<'a>), Boolean(VariantToBooleanArrowRowBuilder<'a>), - Int8(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int8Type>), - Int16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int16Type>), - Int32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int32Type>), - Int64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int64Type>), - UInt8(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt8Type>), - UInt16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt16Type>), - UInt32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt32Type>), - UInt64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt64Type>), - Float16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float16Type>), - Float32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float32Type>), - Float64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float64Type>), - Decimal32(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal32Type>), - Decimal64(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal64Type>), - Decimal128(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal128Type>), - Decimal256(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal256Type>), - TimestampSecond(VariantToTimestampArrowRowBuilder<'a, datatypes::TimestampSecondType>), - TimestampSecondNtz(VariantToTimestampNtzArrowRowBuilder<'a, datatypes::TimestampSecondType>), - TimestampMilli(VariantToTimestampArrowRowBuilder<'a, datatypes::TimestampMillisecondType>), + Int8(VariantToPrimitiveArrowRowBuilder<'a, types::Int8Type>), + Int16(VariantToPrimitiveArrowRowBuilder<'a, types::Int16Type>), + Int32(VariantToPrimitiveArrowRowBuilder<'a, types::Int32Type>), + Int64(VariantToPrimitiveArrowRowBuilder<'a, types::Int64Type>), + UInt8(VariantToPrimitiveArrowRowBuilder<'a, types::UInt8Type>), + UInt16(VariantToPrimitiveArrowRowBuilder<'a, types::UInt16Type>), + UInt32(VariantToPrimitiveArrowRowBuilder<'a, types::UInt32Type>), + UInt64(VariantToPrimitiveArrowRowBuilder<'a, types::UInt64Type>), + Float16(VariantToPrimitiveArrowRowBuilder<'a, types::Float16Type>), + Float32(VariantToPrimitiveArrowRowBuilder<'a, types::Float32Type>), + Float64(VariantToPrimitiveArrowRowBuilder<'a, types::Float64Type>), + Decimal32(VariantToDecimalArrowRowBuilder<'a, types::Decimal32Type>), + Decimal64(VariantToDecimalArrowRowBuilder<'a, types::Decimal64Type>), + Decimal128(VariantToDecimalArrowRowBuilder<'a, types::Decimal128Type>), + Decimal256(VariantToDecimalArrowRowBuilder<'a, types::Decimal256Type>), + TimestampSecond(VariantToTimestampArrowRowBuilder<'a, types::TimestampSecondType>), + TimestampSecondNtz(VariantToTimestampNtzArrowRowBuilder<'a, types::TimestampSecondType>), + TimestampMilli(VariantToTimestampArrowRowBuilder<'a, types::TimestampMillisecondType>), TimestampMilliNtz( - VariantToTimestampNtzArrowRowBuilder<'a, datatypes::TimestampMillisecondType>, + VariantToTimestampNtzArrowRowBuilder<'a, types::TimestampMillisecondType>, ), - TimestampMicro(VariantToTimestampArrowRowBuilder<'a, datatypes::TimestampMicrosecondType>), + TimestampMicro(VariantToTimestampArrowRowBuilder<'a, types::TimestampMicrosecondType>), TimestampMicroNtz( - VariantToTimestampNtzArrowRowBuilder<'a, datatypes::TimestampMicrosecondType>, + VariantToTimestampNtzArrowRowBuilder<'a, types::TimestampMicrosecondType>, ), - TimestampNano(VariantToTimestampArrowRowBuilder<'a, datatypes::TimestampNanosecondType>), - TimestampNanoNtz(VariantToTimestampNtzArrowRowBuilder<'a, datatypes::TimestampNanosecondType>), - Time32Second(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Time32SecondType>), - Time32Milli(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Time32MillisecondType>), - Time64Micro(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Time64MicrosecondType>), - Time64Nano(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Time64NanosecondType>), - Date32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Date32Type>), - Date64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Date64Type>), + TimestampNano(VariantToTimestampArrowRowBuilder<'a, types::TimestampNanosecondType>), + TimestampNanoNtz(VariantToTimestampNtzArrowRowBuilder<'a, types::TimestampNanosecondType>), + Time32Second(VariantToPrimitiveArrowRowBuilder<'a, types::Time32SecondType>), + Time32Milli(VariantToPrimitiveArrowRowBuilder<'a, types::Time32MillisecondType>), + Time64Micro(VariantToPrimitiveArrowRowBuilder<'a, types::Time64MicrosecondType>), + Time64Nano(VariantToPrimitiveArrowRowBuilder<'a, types::Time64NanosecondType>), + Date32(VariantToPrimitiveArrowRowBuilder<'a, types::Date32Type>), + Date64(VariantToPrimitiveArrowRowBuilder<'a, types::Date64Type>), Uuid(VariantToUuidArrowRowBuilder<'a>), String(VariantToStringArrowBuilder<'a, StringBuilder>), LargeString(VariantToStringArrowBuilder<'a, LargeStringBuilder>), @@ -94,7 +94,7 @@ pub(crate) enum VariantToArrowRowBuilder<'a> { } impl<'a> PrimitiveVariantToArrowRowBuilder<'a> { - pub fn append_null(&mut self) -> Result<()> { + pub fn append_null(&mut self) -> Result<(), ArrowError> { use PrimitiveVariantToArrowRowBuilder::*; match self { Null(b) => b.append_null(), @@ -138,7 +138,7 @@ impl<'a> PrimitiveVariantToArrowRowBuilder<'a> { } } - pub fn append_value(&mut self, value: &Variant<'_, '_>) -> Result { + pub fn append_value(&mut self, value: &Variant<'_, '_>) -> Result { use PrimitiveVariantToArrowRowBuilder::*; match self { Null(b) => b.append_value(value), @@ -182,7 +182,7 @@ impl<'a> PrimitiveVariantToArrowRowBuilder<'a> { } } - pub fn finish(self) -> Result { + pub fn finish(self) -> Result { use PrimitiveVariantToArrowRowBuilder::*; match self { Null(b) => b.finish(), @@ -228,7 +228,7 @@ impl<'a> PrimitiveVariantToArrowRowBuilder<'a> { } impl<'a> VariantToArrowRowBuilder<'a> { - pub fn append_null(&mut self) -> Result<()> { + pub fn append_null(&mut self) -> Result<(), ArrowError> { use VariantToArrowRowBuilder::*; match self { Primitive(b) => b.append_null(), @@ -237,7 +237,7 @@ impl<'a> VariantToArrowRowBuilder<'a> { } } - pub fn append_value(&mut self, value: Variant<'_, '_>) -> Result { + pub fn append_value(&mut self, value: Variant<'_, '_>) -> Result { use VariantToArrowRowBuilder::*; match self { Primitive(b) => b.append_value(&value), @@ -246,7 +246,7 @@ impl<'a> VariantToArrowRowBuilder<'a> { } } - pub fn finish(self) -> Result { + pub fn finish(self) -> Result { use VariantToArrowRowBuilder::*; match self { Primitive(b) => b.finish(), @@ -261,7 +261,7 @@ pub(crate) fn make_primitive_variant_to_arrow_row_builder<'a>( data_type: &'a DataType, cast_options: &'a CastOptions, capacity: usize, -) -> Result> { +) -> Result, ArrowError> { use PrimitiveVariantToArrowRowBuilder::*; let builder = @@ -433,7 +433,7 @@ pub(crate) fn make_variant_to_arrow_row_builder<'a>( data_type: Option<&'a DataType>, cast_options: &'a CastOptions, capacity: usize, -) -> Result> { +) -> Result, ArrowError> { use VariantToArrowRowBuilder::*; let mut builder = match data_type { @@ -484,11 +484,11 @@ pub(crate) struct VariantPathRowBuilder<'a> { } impl<'a> VariantPathRowBuilder<'a> { - fn append_null(&mut self) -> Result<()> { + fn append_null(&mut self) -> Result<(), ArrowError> { self.builder.append_null() } - fn append_value(&mut self, value: Variant<'_, '_>) -> Result { + fn append_value(&mut self, value: Variant<'_, '_>) -> Result { if let Some(v) = value.get_path(&self.path) { self.builder.append_value(v) } else { @@ -497,7 +497,7 @@ impl<'a> VariantPathRowBuilder<'a> { } } - fn finish(self) -> Result { + fn finish(self) -> Result { self.builder.finish() } } @@ -526,12 +526,12 @@ macro_rules! define_variant_to_primitive_builder { } } - fn append_null(&mut self) -> Result<()> { + fn append_null(&mut self) -> Result<(), ArrowError> { self.builder.append_null(); Ok(()) } - fn append_value(&mut self, $value: &Variant<'_, '_>) -> Result { + fn append_value(&mut self, $value: &Variant<'_, '_>) -> Result { if let Some(v) = $value_transform { self.builder.append_value(v); Ok(true) @@ -553,7 +553,7 @@ macro_rules! define_variant_to_primitive_builder { // Add this to silence unused mut warning from macro-generated code // This is mainly for `FakeNullBuilder` #[allow(unused_mut)] - fn finish(mut self) -> Result { + fn finish(mut self) -> Result { Ok(Arc::new(self.builder.finish())) } } @@ -571,7 +571,7 @@ define_variant_to_primitive_builder!( struct VariantToBooleanArrowRowBuilder<'a> |capacity| -> BooleanBuilder { BooleanBuilder::with_capacity(capacity) }, |value| value.as_boolean(), - type_name: datatypes::BooleanType::DATA_TYPE + type_name: types::BooleanType::DATA_TYPE ); define_variant_to_primitive_builder!( @@ -626,7 +626,7 @@ where capacity: usize, precision: u8, scale: i8, - ) -> Result { + ) -> Result { let builder = PrimitiveBuilder::::with_capacity(capacity) .with_precision_and_scale(precision, scale)?; Ok(Self { @@ -637,12 +637,12 @@ where }) } - fn append_null(&mut self) -> Result<()> { + fn append_null(&mut self) -> Result<(), ArrowError> { self.builder.append_null(); Ok(()) } - fn append_value(&mut self, value: &Variant<'_, '_>) -> Result { + fn append_value(&mut self, value: &Variant<'_, '_>) -> Result { if let Some(scaled) = variant_to_unscaled_decimal::(value, self.precision, self.scale) { self.builder.append_value(scaled); Ok(true) @@ -660,7 +660,7 @@ where } } - fn finish(mut self) -> Result { + fn finish(mut self) -> Result { Ok(Arc::new(self.builder.finish())) } } @@ -679,12 +679,12 @@ impl<'a> VariantToUuidArrowRowBuilder<'a> { } } - fn append_null(&mut self) -> Result<()> { + fn append_null(&mut self) -> Result<(), ArrowError> { self.builder.append_null(); Ok(()) } - fn append_value(&mut self, value: &Variant<'_, '_>) -> Result { + fn append_value(&mut self, value: &Variant<'_, '_>) -> Result { match value.as_uuid() { Some(uuid) => { self.builder @@ -703,7 +703,7 @@ impl<'a> VariantToUuidArrowRowBuilder<'a> { } } - fn finish(mut self) -> Result { + fn finish(mut self) -> Result { Ok(Arc::new(self.builder.finish())) } } @@ -726,19 +726,19 @@ impl VariantToBinaryVariantArrowRowBuilder { } impl VariantToBinaryVariantArrowRowBuilder { - fn append_null(&mut self) -> Result<()> { + fn append_null(&mut self) -> Result<(), ArrowError> { self.builder.append_null(); self.nulls.append_null(); Ok(()) } - fn append_value(&mut self, value: Variant<'_, '_>) -> Result { + fn append_value(&mut self, value: Variant<'_, '_>) -> Result { self.builder.append_value(value); self.nulls.append_non_null(); Ok(true) } - fn finish(mut self) -> Result { + fn finish(mut self) -> Result { let variant_array = VariantArray::from_parts( self.metadata, Some(self.builder.build()?), @@ -779,9 +779,8 @@ define_variant_to_primitive_builder!( #[cfg(test)] mod tests { use super::make_primitive_variant_to_arrow_row_builder; - use arrow::compute::CastOptions; - use arrow::datatypes::{DataType, Field, Fields, UnionFields, UnionMode}; - use arrow::error::ArrowError; + use arrow_cast::CastOptions; + use arrow_schema::{ArrowError, DataType, Field, Fields, UnionFields, UnionMode}; use std::sync::Arc; #[test] diff --git a/parquet/Cargo.toml b/parquet/Cargo.toml index 50f69fea5441..85a7f6f96176 100644 --- a/parquet/Cargo.toml +++ b/parquet/Cargo.toml @@ -131,7 +131,7 @@ encryption = ["dep:ring"] flate2-rust_backened = ["flate2/rust_backend"] flate2-zlib-rs = ["flate2/zlib-rs"] # Enable parquet variant support -variant_experimental = ["arrow", "parquet-variant", "parquet-variant-json", "parquet-variant-compute"] +variant_experimental = ["arrow", "arrow/variant_experimental", "parquet-variant", "parquet-variant-json", "parquet-variant-compute"] # Enable geospatial support geospatial = ["parquet-geospatial"]