diff --git a/Cargo.lock b/Cargo.lock index af1c980d45da..54c4f9564e21 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2401,6 +2401,7 @@ dependencies = [ "criterion", "half", "indexmap", + "num-traits", "parquet-variant", "parquet-variant-json", "rand 0.9.4", diff --git a/parquet-variant-compute/Cargo.toml b/parquet-variant-compute/Cargo.toml index bcfb36b8710c..4cf2a3b1804d 100644 --- a/parquet-variant-compute/Cargo.toml +++ b/parquet-variant-compute/Cargo.toml @@ -37,6 +37,7 @@ parquet-variant-json = { workspace = true } chrono = { workspace = true } uuid = { version = "1.18.0", features = ["v4"] } serde_json = "1.0" +num-traits = { version = "0.2", default-features = false } # uuid requires the `js` feature to run on wasm [target.'cfg(target_arch = "wasm32")'.dependencies] diff --git a/parquet-variant-compute/src/shred_variant.rs b/parquet-variant-compute/src/shred_variant.rs index 440f4b716521..a0941b2ec0bf 100644 --- a/parquet-variant-compute/src/shred_variant.rs +++ b/parquet-variant-compute/src/shred_variant.rs @@ -92,6 +92,7 @@ pub(crate) fn shred_variant_with_options( cast_options, array.len(), NullValue::TopLevelVariant, + true, )?; for i in 0..array.len() { if array.is_null(i) { @@ -145,6 +146,7 @@ pub(crate) fn make_variant_to_shredded_variant_arrow_row_builder<'a>( cast_options: &'a CastOptions, capacity: usize, null_value: NullValue, + shred: bool, ) -> Result> { let builder = match data_type { DataType::Struct(fields) => { @@ -153,6 +155,7 @@ pub(crate) fn make_variant_to_shredded_variant_arrow_row_builder<'a>( cast_options, capacity, null_value, + shred, )?; VariantToShreddedVariantRowBuilder::Object(typed_value_builder) } @@ -193,7 +196,7 @@ pub(crate) fn make_variant_to_shredded_variant_arrow_row_builder<'a>( | DataType::FixedSizeBinary(16) // UUID => { let builder = - make_primitive_variant_to_arrow_row_builder(data_type, cast_options, capacity)?; + make_primitive_variant_to_arrow_row_builder(data_type, cast_options, capacity, shred)?; let typed_value_builder = VariantToShreddedPrimitiveVariantRowBuilder::new(builder, capacity, null_value); VariantToShreddedVariantRowBuilder::Primitive(typed_value_builder) @@ -369,6 +372,7 @@ impl<'a> VariantToShreddedObjectVariantRowBuilder<'a> { cast_options: &'a CastOptions, capacity: usize, null_value: NullValue, + shred: bool, ) -> Result { let typed_value_builders = fields.iter().map(|field| { let builder = make_variant_to_shredded_variant_arrow_row_builder( @@ -376,6 +380,7 @@ impl<'a> VariantToShreddedObjectVariantRowBuilder<'a> { cast_options, capacity, NullValue::ObjectField, + shred, )?; Ok((field.name().as_str(), builder)) }); @@ -710,9 +715,12 @@ mod tests { use arrow::datatypes::{ ArrowPrimitiveType, DataType, Field, Fields, Int64Type, TimeUnit, UnionFields, UnionMode, }; + use arrow_schema::IntervalUnit; + use chrono::{DateTime, NaiveDate, NaiveTime}; use parquet_variant::{ BuilderSpecificState, EMPTY_VARIANT_METADATA_BYTES, ObjectBuilder, ReadOnlyMetadataBuilder, - Variant, VariantBuilder, VariantPath, VariantPathElement, + ShortString, Variant, VariantBuilder, VariantDecimal4, VariantDecimal8, VariantDecimal16, + VariantPath, VariantPathElement, }; use std::sync::Arc; use uuid::Uuid; @@ -1046,6 +1054,7 @@ mod tests { &cast_options, 1, mode, + true, ) .unwrap(); primitive_builder.append_null().unwrap(); @@ -1076,6 +1085,7 @@ mod tests { &cast_options, 1, mode, + true, ) .unwrap(); array_builder.append_null().unwrap(); @@ -1104,6 +1114,7 @@ mod tests { &cast_options, 1, mode, + true, ) .unwrap(); object_builder.append_null().unwrap(); @@ -1310,7 +1321,7 @@ mod tests { .downcast_ref::() .unwrap(); assert_eq!(typed_value_int32.value(0), 42); - assert_eq!(typed_value_int32.value(1), 3); + assert!(typed_value_int32.is_null(1)); // float doesn't shred to int32 assert!(typed_value_int32.is_null(2)); // string doesn't convert to int32 // Test Float64 target @@ -1321,7 +1332,7 @@ mod tests { .as_any() .downcast_ref::() .unwrap(); - assert_eq!(typed_value_float64.value(0), 42.0); // int converts to float + assert!(typed_value_float64.is_null(0)); // int doesn't shred to float assert_eq!(typed_value_float64.value(1), 3.15); assert!(typed_value_float64.is_null(2)); // string doesn't convert } @@ -2807,4 +2818,207 @@ mod tests { let shredding_type = ShreddedSchemaBuilder::default().build(); assert_eq!(shredding_type, DataType::Null); } + + // This test wants to cover that the variant can/can't be shredded to the given data type. + #[test] + fn test_variant_type_shredded_correctly() { + // array contains all variant types + let mut array_builder = VariantArrayBuilder::new(30); + array_builder.append_value(Variant::Null); + array_builder.append_value(Variant::Int8(1)); + array_builder.append_value(Variant::Int16(2)); + array_builder.append_value(Variant::Int32(3)); + array_builder.append_value(Variant::Int64(4)); + array_builder.append_value(Variant::Date(NaiveDate::from_epoch_days(12345).unwrap())); + array_builder.append_value(Variant::TimestampMicros( + DateTime::from_timestamp_micros(123456789).unwrap(), + )); + array_builder.append_value(Variant::TimestampNtzMicros( + DateTime::from_timestamp_micros(123456789) + .unwrap() + .naive_utc(), + )); + array_builder.append_value(Variant::TimestampNanos(DateTime::from_timestamp_nanos( + 1234567890123, + ))); + array_builder.append_value(Variant::TimestampNtzNanos( + DateTime::from_timestamp_nanos(1234567890123).naive_utc(), + )); + array_builder.append_value(VariantDecimal4::try_new(123, 2).unwrap()); + array_builder.append_value(VariantDecimal8::try_new(123, 3).unwrap()); + array_builder.append_value(VariantDecimal16::try_new(123, 4).unwrap()); + array_builder.append_value(Variant::Float(5.2)); + array_builder.append_value(Variant::Double(6.4)); + array_builder.append_value(Variant::BooleanTrue); + array_builder.append_value(Variant::BooleanFalse); + array_builder.append_value(Variant::Binary("helow".as_bytes())); + array_builder.append_value(Variant::String("hello")); + array_builder.append_value(Variant::ShortString( + ShortString::try_from("world").unwrap(), + )); + array_builder.append_value(Variant::Time( + NaiveTime::from_num_seconds_from_midnight_opt(12345, 123).unwrap(), + )); + + let array = array_builder.build(); + + fn can_shred_to(v: &Variant, dt: &DataType) -> bool { + matches!( + (v, dt), + (Variant::Int8(_), DataType::Int8) + | (Variant::Int8(_), DataType::Int16) + | (Variant::Int8(_), DataType::Int32) + | (Variant::Int8(_), DataType::Int64) + | (Variant::Int16(_), DataType::Int8) + | (Variant::Int16(_), DataType::Int16) + | (Variant::Int16(_), DataType::Int32) + | (Variant::Int16(_), DataType::Int64) + | (Variant::Int32(_), DataType::Int8) + | (Variant::Int32(_), DataType::Int16) + | (Variant::Int32(_), DataType::Int32) + | (Variant::Int32(_), DataType::Int64) + | (Variant::Int64(_), DataType::Int8) + | (Variant::Int64(_), DataType::Int16) + | (Variant::Int64(_), DataType::Int32) + | (Variant::Int64(_), DataType::Int64) + | (Variant::Date(_), DataType::Date32) + | ( + Variant::TimestampMicros(_), + DataType::Timestamp(TimeUnit::Microsecond, Some(_)), + ) + | ( + Variant::TimestampMicros(_), + DataType::Timestamp(TimeUnit::Nanosecond, Some(_)) + ) + | ( + Variant::TimestampNtzMicros(_), + DataType::Timestamp(TimeUnit::Microsecond, None), + ) + | ( + Variant::TimestampNtzMicros(_), + DataType::Timestamp(TimeUnit::Nanosecond, None) + ) + | ( + Variant::TimestampNanos(_), + DataType::Timestamp(TimeUnit::Nanosecond, Some(_)), + ) + | ( + Variant::TimestampNtzNanos(_), + DataType::Timestamp(TimeUnit::Nanosecond, None), + ) + | (Variant::Decimal4(_), DataType::Decimal32(_, _)) + | (Variant::Decimal4(_), DataType::Decimal64(_, _)) + | (Variant::Decimal4(_), DataType::Decimal128(_, _)) + | (Variant::Decimal8(_), DataType::Decimal32(_, _)) + | (Variant::Decimal8(_), DataType::Decimal64(_, _)) + | (Variant::Decimal8(_), DataType::Decimal128(_, _)) + | (Variant::Decimal16(_), DataType::Decimal32(_, _)) + | (Variant::Decimal16(_), DataType::Decimal64(_, _)) + | (Variant::Decimal16(_), DataType::Decimal128(_, _)) + | (Variant::Float(_), DataType::Float32) + | (Variant::Float(_), DataType::Float64) + | (Variant::Double(_), DataType::Float32) + | (Variant::Double(_), DataType::Float64) + | (Variant::BooleanFalse, DataType::Boolean) + | (Variant::BooleanTrue, DataType::Boolean) + | (Variant::Binary(_), DataType::Binary) + | (Variant::Binary(_), DataType::BinaryView) + | (Variant::Binary(_), DataType::LargeBinary) + | (Variant::ShortString(_), DataType::Utf8) + | (Variant::ShortString(_), DataType::Utf8View) + | (Variant::ShortString(_), DataType::LargeUtf8) + | (Variant::String(_), DataType::Utf8) + | (Variant::String(_), DataType::Utf8View) + | (Variant::String(_), DataType::LargeUtf8) + | (Variant::Time(_), DataType::Time64(_)) + ) + } + + macro_rules! assert_shred_type { + ($shred_type:expr, $expected_value_valid_bits:expr) => { + let shredded_array_result = shred_variant(&array, &$shred_type); + match shredded_array_result { + Ok(shredded_array) => { + let value_column = shredded_array.inner().column_by_name("value").unwrap(); + for (idx, valid) in $expected_value_valid_bits.iter().enumerate() { + match valid { + true => assert!( + value_column.is_null(idx), + "{:?} should be shredded to {}", + array.value(idx), + $shred_type + ), + false => assert!( + value_column.is_valid(idx), + "{:?} should not be shredded to {}", + array.value(idx), + $shred_type + ), + } + } + } + Err(e) => { + let error_msg = format!("is not a valid variant shredding type"); + assert!( + e.to_string().contains(error_msg.as_str()), + "{} => {}", + $shred_type, + e.to_string() + ); + } + } + }; + } + + let types = [ + DataType::Null, + DataType::Boolean, + DataType::Int8, + DataType::Int16, + DataType::Int32, + DataType::Int64, + DataType::UInt8, + DataType::UInt16, + DataType::UInt32, + DataType::UInt64, + DataType::Float32, + DataType::Float64, + DataType::Timestamp(TimeUnit::Second, Some("+00:00".into())), + DataType::Timestamp(TimeUnit::Second, None), + DataType::Timestamp(TimeUnit::Millisecond, Some("-00:00".into())), + DataType::Timestamp(TimeUnit::Millisecond, None), + DataType::Timestamp(TimeUnit::Microsecond, Some("-00:00".into())), + DataType::Timestamp(TimeUnit::Microsecond, None), + DataType::Timestamp(TimeUnit::Nanosecond, Some("+00:00".into())), + DataType::Timestamp(TimeUnit::Nanosecond, None), + DataType::Date32, + DataType::Date64, + DataType::Time32(TimeUnit::Second), + DataType::Time32(TimeUnit::Millisecond), + DataType::Time64(TimeUnit::Microsecond), + DataType::Time64(TimeUnit::Nanosecond), + DataType::Duration(TimeUnit::Nanosecond), + DataType::Interval(IntervalUnit::DayTime), + DataType::Binary, + DataType::FixedSizeBinary(16), // uuid + DataType::FixedSizeBinary(32), + DataType::LargeBinary, + DataType::BinaryView, + DataType::Utf8, + DataType::LargeUtf8, + DataType::Utf8View, + DataType::Decimal32(4, 2), + DataType::Decimal64(10, 4), + DataType::Decimal128(20, 10), + DataType::Decimal256(30, 10), + ]; + + for data_type in types { + let expected_bits = array + .iter() + .map(|v| can_shred_to(&v.unwrap(), &data_type)) + .collect::>(); + assert_shred_type!(data_type, expected_bits); + } + } } diff --git a/parquet-variant-compute/src/type_conversion.rs b/parquet-variant-compute/src/type_conversion.rs index 2255d4316b25..228a5e64dbc1 100644 --- a/parquet-variant-compute/src/type_conversion.rs +++ b/parquet-variant-compute/src/type_conversion.rs @@ -17,28 +17,32 @@ //! Module for transforming a typed arrow `Array` to `VariantArray`. +use arrow::array::ArrowNativeTypeOp; use arrow::compute::{ - CastOptions, DecimalCast, parse_string_to_decimal_native, rescale_decimal, - single_float_to_decimal, + CastOptions, DecimalCast, cast_num_to_bool, cast_single_string_to_boolean_default, num_cast, + parse_string_to_decimal_native, rescale_decimal, single_bool_to_numeric, + single_decimal_to_float_lossy, single_float_to_decimal, }; use arrow::datatypes::{ self, ArrowPrimitiveType, ArrowTimestampType, Decimal32Type, Decimal64Type, Decimal128Type, DecimalType, }; use arrow::error::{ArrowError, Result}; -use chrono::Timelike; +use chrono::{NaiveDate, NaiveTime, Timelike}; +use half::f16; +use num_traits::NumCast; use parquet_variant::{Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16}; /// Extension trait for Arrow primitive types that can extract their native value from a Variant pub(crate) trait PrimitiveFromVariant: ArrowPrimitiveType { - fn from_variant(variant: &Variant<'_, '_>) -> Option; + fn from_variant(variant: &Variant<'_, '_>, shred: bool) -> Option; } /// Extension trait for Arrow timestamp types that can extract their native value from a Variant /// We can't use [`PrimitiveFromVariant`] directly because we need _two_ implementations for each /// timestamp type -- the `NTZ` param here. pub(crate) trait TimestampFromVariant: ArrowTimestampType { - fn from_variant(variant: &Variant<'_, '_>) -> Option; + fn from_variant(variant: &Variant<'_, '_>, shred: bool) -> Option; } /// Cast a single `Variant` value with safe/strict semantics. @@ -64,10 +68,13 @@ pub(crate) fn variant_cast_with_options<'a, 'm, 'v, T>( /// Macro to generate PrimitiveFromVariant implementations for Arrow primitive types macro_rules! impl_primitive_from_variant { - ($arrow_type:ty, $variant_method:ident $(, $cast_fn:expr)?) => { + ($arrow_type:ty, $shred_method:ident, $get_method:ident $(, $cast_fn:expr)?) => { impl PrimitiveFromVariant for $arrow_type { - fn from_variant(variant: &Variant<'_, '_>) -> Option { - let value = variant.$variant_method(); + fn from_variant(variant: &Variant<'_, '_>, shred: bool) -> Option { + let value = match shred { + true => variant.$shred_method(), + false => $get_method(variant), + }; $( let value = value.and_then($cast_fn); )? value } @@ -78,53 +85,166 @@ macro_rules! impl_primitive_from_variant { macro_rules! impl_timestamp_from_variant { ($timestamp_type:ty, $variant_method:ident, ntz=$ntz:ident, $cast_fn:expr $(,)?) => { impl TimestampFromVariant<{ $ntz }> for $timestamp_type { - fn from_variant(variant: &Variant<'_, '_>) -> Option { + #[allow(unused)] + fn from_variant(variant: &Variant<'_, '_>, shred: bool) -> Option { variant.$variant_method().and_then($cast_fn) } } }; } -impl_primitive_from_variant!(datatypes::Int32Type, as_int32); -impl_primitive_from_variant!(datatypes::Int16Type, as_int16); -impl_primitive_from_variant!(datatypes::Int8Type, as_int8); -impl_primitive_from_variant!(datatypes::Int64Type, as_int64); -impl_primitive_from_variant!(datatypes::UInt8Type, as_u8); -impl_primitive_from_variant!(datatypes::UInt16Type, as_u16); -impl_primitive_from_variant!(datatypes::UInt32Type, as_u32); -impl_primitive_from_variant!(datatypes::UInt64Type, as_u64); -impl_primitive_from_variant!(datatypes::Float16Type, as_f16); -impl_primitive_from_variant!(datatypes::Float32Type, as_f32); -impl_primitive_from_variant!(datatypes::Float64Type, as_f64); -impl_primitive_from_variant!(datatypes::Date32Type, as_naive_date, |v| { +enum NumericKind { + Integer, + Float, +} + +trait DecimalCastTarget: NumCast + Default { + const KIND: NumericKind; +} + +macro_rules! impl_decimal_cast_target { + ($raw_type: ident, $target_kind:expr) => { + impl DecimalCastTarget for $raw_type { + const KIND: NumericKind = $target_kind; + } + }; +} + +impl_decimal_cast_target!(i8, NumericKind::Integer); +impl_decimal_cast_target!(i16, NumericKind::Integer); +impl_decimal_cast_target!(i32, NumericKind::Integer); +impl_decimal_cast_target!(i64, NumericKind::Integer); +impl_decimal_cast_target!(u8, NumericKind::Integer); +impl_decimal_cast_target!(u16, NumericKind::Integer); +impl_decimal_cast_target!(u32, NumericKind::Integer); +impl_decimal_cast_target!(u64, NumericKind::Integer); +impl_decimal_cast_target!(f16, NumericKind::Float); +impl_decimal_cast_target!(f32, NumericKind::Float); +impl_decimal_cast_target!(f64, NumericKind::Float); + +/// Converts a boolean or numeric variant(integers, floating-point, and decimals) +/// to the specified numeric type `T`. +/// +/// Uses Arrow's casting logic to perform the conversion. Returns `Some(T)` if +/// the conversion succeeds, `None` if the variant can't be casted to type `T`. +fn as_num(variant: &Variant) -> Option +where + T: DecimalCastTarget, +{ + match *variant { + Variant::BooleanFalse => single_bool_to_numeric(false), + Variant::BooleanTrue => single_bool_to_numeric(true), + Variant::Int8(i) => num_cast(i), + Variant::Int16(i) => num_cast(i), + Variant::Int32(i) => num_cast(i), + Variant::Int64(i) => num_cast(i), + Variant::Float(f) => num_cast(f), + Variant::Double(d) => num_cast(d), + Variant::Decimal4(d) => { + cast_decimal_to_num::(d.integer(), d.scale(), |x| x as f64) + } + Variant::Decimal8(d) => { + cast_decimal_to_num::(d.integer(), d.scale(), |x| x as f64) + } + Variant::Decimal16(d) => { + cast_decimal_to_num::(d.integer(), d.scale(), |x| x as f64) + } + _ => None, + } +} + +fn cast_decimal_to_num(raw: D::Native, scale: u8, as_float: F) -> Option +where + D: DecimalType, + D::Native: NumCast + ArrowNativeTypeOp, + T: DecimalCastTarget, + F: Fn(D::Native) -> f64, +{ + let base: D::Native = NumCast::from(10)?; + + let div = base.pow_checked(>::from(scale)).ok()?; + match T::KIND { + NumericKind::Integer => raw + .div_checked(div) + .ok() + .and_then(::from::), + NumericKind::Float => T::from(single_decimal_to_float_lossy::( + &as_float, + raw, + >::from(scale), + )), + } +} + +fn cast_naive_date(value: &Variant<'_, '_>) -> Option { + value.as_naive_date() +} + +fn cast_time_utc(value: &Variant<'_, '_>) -> Option { + value.as_time_utc() +} + +impl_primitive_from_variant!(datatypes::Int32Type, as_int32, as_num); +impl_primitive_from_variant!(datatypes::Int16Type, as_int16, as_num); +impl_primitive_from_variant!(datatypes::Int8Type, as_int8, as_num); +impl_primitive_from_variant!(datatypes::Int64Type, as_int64, as_num); +impl_primitive_from_variant!(datatypes::UInt8Type, as_u8, as_num); +impl_primitive_from_variant!(datatypes::UInt16Type, as_u16, as_num); +impl_primitive_from_variant!(datatypes::UInt32Type, as_u32, as_num); +impl_primitive_from_variant!(datatypes::UInt64Type, as_u64, as_num); +impl_primitive_from_variant!(datatypes::Float16Type, as_f16, as_num); +impl_primitive_from_variant!(datatypes::Float32Type, as_f32, as_num); +impl_primitive_from_variant!(datatypes::Float64Type, as_f64, as_num); +impl_primitive_from_variant!(datatypes::Date32Type, as_naive_date, cast_naive_date, |v| { Some(datatypes::Date32Type::from_naive_date(v)) }); -impl_primitive_from_variant!(datatypes::Date64Type, as_naive_date, |v| { +impl_primitive_from_variant!(datatypes::Date64Type, as_naive_date, cast_naive_date, |v| { Some(datatypes::Date64Type::from_naive_date(v)) }); -impl_primitive_from_variant!(datatypes::Time32SecondType, as_time_utc, |v| { - // Return None if there are leftover nanoseconds - if v.nanosecond() != 0 { - None - } else { - Some(v.num_seconds_from_midnight() as i32) +impl_primitive_from_variant!( + datatypes::Time32SecondType, + as_time_utc, + cast_time_utc, + |v| { + // Return None if there are leftover nanoseconds + if v.nanosecond() != 0 { + None + } else { + Some(v.num_seconds_from_midnight() as i32) + } } -}); -impl_primitive_from_variant!(datatypes::Time32MillisecondType, as_time_utc, |v| { - // Return None if there are leftover microseconds - if v.nanosecond() % 1_000_000 != 0 { - None - } else { - Some((v.num_seconds_from_midnight() * 1_000) as i32 + (v.nanosecond() / 1_000_000) as i32) +); +impl_primitive_from_variant!( + datatypes::Time32MillisecondType, + as_time_utc, + cast_time_utc, + |v| { + // Return None if there are leftover microseconds + if v.nanosecond() % 1_000_000 != 0 { + None + } else { + Some( + (v.num_seconds_from_midnight() * 1_000) as i32 + + (v.nanosecond() / 1_000_000) as i32, + ) + } } -}); -impl_primitive_from_variant!(datatypes::Time64MicrosecondType, as_time_utc, |v| { - Some(v.num_seconds_from_midnight() as i64 * 1_000_000 + v.nanosecond() as i64 / 1_000) -}); -impl_primitive_from_variant!(datatypes::Time64NanosecondType, as_time_utc, |v| { - // convert micro to nano seconds - Some(v.num_seconds_from_midnight() as i64 * 1_000_000_000 + v.nanosecond() as i64) -}); +); +impl_primitive_from_variant!( + datatypes::Time64MicrosecondType, + as_time_utc, + cast_time_utc, + |v| { Some(v.num_seconds_from_midnight() as i64 * 1_000_000 + v.nanosecond() as i64 / 1_000) } +); +impl_primitive_from_variant!( + datatypes::Time64NanosecondType, + as_time_utc, + cast_time_utc, + |v| { + // convert micro to nano seconds + Some(v.num_seconds_from_midnight() as i64 * 1_000_000_000 + v.nanosecond() as i64) + } +); impl_timestamp_from_variant!( datatypes::TimestampSecondType, as_timestamp_ntz_nanos, @@ -218,6 +338,7 @@ pub(crate) fn variant_to_unscaled_decimal( variant: &Variant<'_, '_>, precision: u8, scale: i8, + shred: bool, ) -> Option where O: DecimalType, @@ -225,58 +346,62 @@ where { let mul = 10_f64.powi(scale as i32); - match variant { - Variant::Int8(i) => rescale_decimal::( + match (variant, shred) { + (Variant::Int8(i), false) => rescale_decimal::( *i as i32, VariantDecimal4::MAX_PRECISION, 0, precision, scale, ), - Variant::Int16(i) => rescale_decimal::( + (Variant::Int16(i), false) => rescale_decimal::( *i as i32, VariantDecimal4::MAX_PRECISION, 0, precision, scale, ), - Variant::Int32(i) => rescale_decimal::( + (Variant::Int32(i), false) => rescale_decimal::( *i, VariantDecimal4::MAX_PRECISION, 0, precision, scale, ), - Variant::Int64(i) => rescale_decimal::( + (Variant::Int64(i), false) => rescale_decimal::( *i, VariantDecimal8::MAX_PRECISION, 0, precision, scale, ), - Variant::Float(f) => single_float_to_decimal::(f64::from(*f), mul), - Variant::Double(f) => single_float_to_decimal::(*f, mul), + (Variant::Float(f), false) => { + single_float_to_decimal::(>::from(*f), mul) + } + (Variant::Double(f), false) => single_float_to_decimal::(*f, mul), // arrow-cast only support cast string to decimal with scale >=0 for now // Please see `cast_string_to_decimal` in arrow-cast/src/cast/decimal.rs for more detail - Variant::String(v) if scale >= 0 => parse_string_to_decimal_native::(v, scale as _).ok(), - Variant::ShortString(v) if scale >= 0 => { + (Variant::String(v), false) if scale >= 0 => { + parse_string_to_decimal_native::(v, scale as _).ok() + } + (Variant::ShortString(v), false) if scale >= 0 => { parse_string_to_decimal_native::(v, scale as _).ok() } - Variant::Decimal4(d) => rescale_decimal::( + (Variant::Decimal4(d), _) => rescale_decimal::( d.integer(), VariantDecimal4::MAX_PRECISION, d.scale() as i8, precision, scale, ), - Variant::Decimal8(d) => rescale_decimal::( + (Variant::Decimal8(d), _) => rescale_decimal::( d.integer(), VariantDecimal8::MAX_PRECISION, d.scale() as i8, precision, scale, ), - Variant::Decimal16(d) => rescale_decimal::( + (Variant::Decimal16(d), _) => rescale_decimal::( d.integer(), VariantDecimal16::MAX_PRECISION, d.scale() as i8, @@ -287,6 +412,26 @@ where } } +pub(crate) fn variant_to_boolean(variant: &Variant<'_, '_>, shred: bool) -> Option { + if shred { + return variant.as_boolean(); + } + + match variant { + Variant::BooleanTrue => Some(true), + Variant::BooleanFalse => Some(false), + Variant::Int8(i) => Some(cast_num_to_bool(*i)), + Variant::Int16(i) => Some(cast_num_to_bool(*i)), + Variant::Int32(i) => Some(cast_num_to_bool(*i)), + Variant::Int64(i) => Some(cast_num_to_bool(*i)), + Variant::Float(f) => Some(cast_num_to_bool(*f)), + Variant::Double(d) => Some(cast_num_to_bool(*d)), + Variant::ShortString(s) => cast_single_string_to_boolean_default(s.as_str()), + Variant::String(s) => cast_single_string_to_boolean_default(s), + _ => None, + } +} + /// Convert the value at a specific index in the given array into a `Variant`. macro_rules! non_generic_conversion_single_value { ($array:expr, $cast_fn:expr, $index:expr) => {{ diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index c3e915993533..5e14e2ff2f18 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -370,6 +370,26 @@ fn try_perfect_shredding(variant_array: &VariantArray, as_field: &Field) -> Opti /// to the specified path. /// 2. `as_type: Some()`: an array of the specified type is returned. /// +/// # Casting Semantics +/// +/// Scalar conversion semantics intentionally follow Arrow cast behavior where applicable. +/// Conversions in this module delegate to Arrow compute cast helpers such as +/// `num_cast`, `cast_num_to_bool`, `single_bool_to_numeric`, and +/// `cast_single_string_to_boolean_default`. +/// +/// - Getting `DataType::Boolean` accepts boolean, numeric, and string variants. +/// Numeric zero maps to `false`; non-zero maps to `true`. String parsing follows +/// Arrow UTF8-to-boolean cast rules. +/// - Getting numeric datatypes such as `DataType::Int8`, `DataType::Int16`, `DataType::Int32`, +/// `DataType::Int64`, `DataType::UInt8`, `DataType::UInt16`, `DataType::UInt32`, `DataType::UInt64`, +/// `DataType::Float16`, `DataType::Float32`, `DataType::Float64` accept +/// boolean and numeric variants (integers, floating-point, and decimals). +/// They return `None` when conversion is not possible. +/// - Getting decimals such as `DataType::Decimal32`, `DataType::Decimal64`, `DataType::Decimal128`, +/// `DataType::Decimal256` accept compatible decimal variants, integer variants, +/// float variants and string variants. +/// They return `None` when conversion is not possible. +/// /// TODO: How would a caller request a struct or list type where the fields/elements can be any /// variant? Caller can pass None as the requested type to fetch a specific path, but it would /// quickly become annoying (and inefficient) to call `variant_get` for each leaf value in a struct or diff --git a/parquet-variant-compute/src/variant_to_arrow.rs b/parquet-variant-compute/src/variant_to_arrow.rs index 9841da555da0..edf77d349b10 100644 --- a/parquet-variant-compute/src/variant_to_arrow.rs +++ b/parquet-variant-compute/src/variant_to_arrow.rs @@ -20,7 +20,7 @@ use crate::shred_variant::{ make_variant_to_shredded_variant_arrow_row_builder, }; use crate::type_conversion::{ - PrimitiveFromVariant, TimestampFromVariant, variant_cast_with_options, + PrimitiveFromVariant, TimestampFromVariant, variant_cast_with_options, variant_to_boolean, variant_to_unscaled_decimal, }; use crate::variant_array::ShreddedVariantFieldArray; @@ -97,6 +97,7 @@ fn make_typed_variant_to_arrow_row_builder<'a>( data_type: &'a DataType, cast_options: &'a CastOptions, capacity: usize, + shred: bool, ) -> Result> { use VariantToArrowRowBuilder::*; @@ -133,8 +134,12 @@ fn make_typed_variant_to_arrow_row_builder<'a>( Ok(Encoded(builder)) } data_type => { - let builder = - make_primitive_variant_to_arrow_row_builder(data_type, cast_options, capacity)?; + let builder = make_primitive_variant_to_arrow_row_builder( + data_type, + cast_options, + capacity, + shred, + )?; Ok(Primitive(builder)) } } @@ -156,7 +161,7 @@ pub(crate) fn make_variant_to_arrow_row_builder<'a>( capacity, )), Some(data_type) => { - make_typed_variant_to_arrow_row_builder(data_type, cast_options, capacity)? + make_typed_variant_to_arrow_row_builder(data_type, cast_options, capacity, false)? } }; @@ -370,6 +375,7 @@ impl<'a> EncodedVariantToArrowRowBuilder<'a> { value_type, cast_options, capacity, + false, )?); Ok(Self { data_type, @@ -397,169 +403,200 @@ pub(crate) fn make_primitive_variant_to_arrow_row_builder<'a>( data_type: &'a DataType, cast_options: &'a CastOptions, capacity: usize, + shred: bool, ) -> Result> { use PrimitiveVariantToArrowRowBuilder::*; - let builder = - match data_type { - DataType::Null => Null(VariantToNullArrowRowBuilder::new(cast_options, capacity)), - DataType::Boolean => { - Boolean(VariantToBooleanArrowRowBuilder::new(cast_options, capacity)) - } - DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new( - cast_options, - capacity, - )), - DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new( - cast_options, - capacity, - )), - DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new( - cast_options, - capacity, - )), - DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new( - cast_options, - capacity, - )), - DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new( - cast_options, - capacity, - )), - DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new( - cast_options, - capacity, - )), - DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new( - cast_options, - capacity, - )), - DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new( - cast_options, - capacity, - )), - DataType::Float16 => Float16(VariantToPrimitiveArrowRowBuilder::new( - cast_options, - capacity, - )), - DataType::Float32 => Float32(VariantToPrimitiveArrowRowBuilder::new( - cast_options, - capacity, - )), - DataType::Float64 => Float64(VariantToPrimitiveArrowRowBuilder::new( - cast_options, - capacity, - )), - DataType::Decimal32(precision, scale) => Decimal32( - VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?, - ), - DataType::Decimal64(precision, scale) => Decimal64( - VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?, - ), - DataType::Decimal128(precision, scale) => Decimal128( - VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?, - ), - DataType::Decimal256(precision, scale) => Decimal256( - VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?, - ), - DataType::Date32 => Date32(VariantToPrimitiveArrowRowBuilder::new( - cast_options, - capacity, - )), - DataType::Date64 => Date64(VariantToPrimitiveArrowRowBuilder::new( - cast_options, - capacity, - )), - DataType::Time32(TimeUnit::Second) => Time32Second( - VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity), - ), - DataType::Time32(TimeUnit::Millisecond) => Time32Milli( - VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity), - ), - DataType::Time32(t) => { - return Err(ArrowError::InvalidArgumentError(format!( - "The unit for Time32 must be second/millisecond, received {t:?}" - ))); - } - DataType::Time64(TimeUnit::Microsecond) => Time64Micro( - VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity), - ), - DataType::Time64(TimeUnit::Nanosecond) => Time64Nano( - VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity), - ), - DataType::Time64(t) => { - return Err(ArrowError::InvalidArgumentError(format!( - "The unit for Time64 must be micro/nano seconds, received {t:?}" - ))); - } - DataType::Timestamp(TimeUnit::Second, None) => TimestampSecondNtz( - VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity), - ), - DataType::Timestamp(TimeUnit::Second, tz) => TimestampSecond( - VariantToTimestampArrowRowBuilder::new(cast_options, capacity, tz.clone()), - ), - DataType::Timestamp(TimeUnit::Millisecond, None) => TimestampMilliNtz( - VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity), - ), - DataType::Timestamp(TimeUnit::Millisecond, tz) => TimestampMilli( - VariantToTimestampArrowRowBuilder::new(cast_options, capacity, tz.clone()), - ), - DataType::Timestamp(TimeUnit::Microsecond, None) => TimestampMicroNtz( - VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity), - ), - DataType::Timestamp(TimeUnit::Microsecond, tz) => TimestampMicro( - VariantToTimestampArrowRowBuilder::new(cast_options, capacity, tz.clone()), - ), - DataType::Timestamp(TimeUnit::Nanosecond, None) => TimestampNanoNtz( - VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity), - ), - DataType::Timestamp(TimeUnit::Nanosecond, tz) => TimestampNano( - VariantToTimestampArrowRowBuilder::new(cast_options, capacity, tz.clone()), - ), - DataType::Duration(_) | DataType::Interval(_) => { - return Err(ArrowError::InvalidArgumentError( - "Casting Variant to duration/interval types is not supported. \ + let builder = match data_type { + DataType::Null => Null(VariantToNullArrowRowBuilder::new(cast_options, capacity)), + DataType::Boolean => Boolean(VariantToBooleanArrowRowBuilder::new( + cast_options, + capacity, + shred, + )), + DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new( + cast_options, + capacity, + shred, + )), + DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new( + cast_options, + capacity, + shred, + )), + DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new( + cast_options, + capacity, + shred, + )), + DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new( + cast_options, + capacity, + shred, + )), + DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new( + cast_options, + capacity, + shred, + )), + DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new( + cast_options, + capacity, + shred, + )), + DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new( + cast_options, + capacity, + shred, + )), + DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new( + cast_options, + capacity, + shred, + )), + DataType::Float16 => Float16(VariantToPrimitiveArrowRowBuilder::new( + cast_options, + capacity, + shred, + )), + DataType::Float32 => Float32(VariantToPrimitiveArrowRowBuilder::new( + cast_options, + capacity, + shred, + )), + DataType::Float64 => Float64(VariantToPrimitiveArrowRowBuilder::new( + cast_options, + capacity, + shred, + )), + DataType::Decimal32(precision, scale) => Decimal32(VariantToDecimalArrowRowBuilder::new( + cast_options, + capacity, + *precision, + *scale, + shred, + )?), + DataType::Decimal64(precision, scale) => Decimal64(VariantToDecimalArrowRowBuilder::new( + cast_options, + capacity, + *precision, + *scale, + shred, + )?), + DataType::Decimal128(precision, scale) => Decimal128(VariantToDecimalArrowRowBuilder::new( + cast_options, + capacity, + *precision, + *scale, + shred, + )?), + DataType::Decimal256(precision, scale) => Decimal256(VariantToDecimalArrowRowBuilder::new( + cast_options, + capacity, + *precision, + *scale, + shred, + )?), + DataType::Date32 => Date32(VariantToPrimitiveArrowRowBuilder::new( + cast_options, + capacity, + shred, + )), + DataType::Date64 => Date64(VariantToPrimitiveArrowRowBuilder::new( + cast_options, + capacity, + shred, + )), + DataType::Time32(TimeUnit::Second) => Time32Second(VariantToPrimitiveArrowRowBuilder::new( + cast_options, + capacity, + shred, + )), + DataType::Time32(TimeUnit::Millisecond) => Time32Milli( + VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity, shred), + ), + DataType::Time32(t) => { + return Err(ArrowError::InvalidArgumentError(format!( + "The unit for Time32 must be second/millisecond, received {t:?}" + ))); + } + DataType::Time64(TimeUnit::Microsecond) => Time64Micro( + VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity, shred), + ), + DataType::Time64(TimeUnit::Nanosecond) => Time64Nano( + VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity, shred), + ), + DataType::Time64(t) => { + return Err(ArrowError::InvalidArgumentError(format!( + "The unit for Time64 must be micro/nano seconds, received {t:?}" + ))); + } + DataType::Timestamp(TimeUnit::Second, None) => TimestampSecondNtz( + VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity, shred), + ), + DataType::Timestamp(TimeUnit::Second, tz) => TimestampSecond( + VariantToTimestampArrowRowBuilder::new(cast_options, capacity, shred, tz.clone()), + ), + DataType::Timestamp(TimeUnit::Millisecond, None) => TimestampMilliNtz( + VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity, shred), + ), + DataType::Timestamp(TimeUnit::Millisecond, tz) => TimestampMilli( + VariantToTimestampArrowRowBuilder::new(cast_options, capacity, shred, tz.clone()), + ), + DataType::Timestamp(TimeUnit::Microsecond, None) => TimestampMicroNtz( + VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity, shred), + ), + DataType::Timestamp(TimeUnit::Microsecond, tz) => TimestampMicro( + VariantToTimestampArrowRowBuilder::new(cast_options, capacity, shred, tz.clone()), + ), + DataType::Timestamp(TimeUnit::Nanosecond, None) => TimestampNanoNtz( + VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity, shred), + ), + DataType::Timestamp(TimeUnit::Nanosecond, tz) => TimestampNano( + VariantToTimestampArrowRowBuilder::new(cast_options, capacity, shred, tz.clone()), + ), + DataType::Duration(_) | DataType::Interval(_) => { + return Err(ArrowError::InvalidArgumentError( + "Casting Variant to duration/interval types is not supported. \ The Variant format does not define duration/interval types." - .to_string(), - )); - } - DataType::Binary => Binary(VariantToBinaryArrowRowBuilder::new(cast_options, capacity)), - DataType::LargeBinary => { - LargeBinary(VariantToBinaryArrowRowBuilder::new(cast_options, capacity)) - } - DataType::BinaryView => { - BinaryView(VariantToBinaryArrowRowBuilder::new(cast_options, capacity)) - } - DataType::FixedSizeBinary(16) => { - Uuid(VariantToUuidArrowRowBuilder::new(cast_options, capacity)) - } - DataType::FixedSizeBinary(_) => { - return Err(ArrowError::NotYetImplemented(format!( - "DataType {data_type:?} not yet implemented" - ))); - } - DataType::Utf8 => String(VariantToStringArrowBuilder::new(cast_options, capacity)), - DataType::LargeUtf8 => { - LargeString(VariantToStringArrowBuilder::new(cast_options, capacity)) - } - DataType::Utf8View => { - StringView(VariantToStringArrowBuilder::new(cast_options, capacity)) - } - DataType::List(_) - | DataType::LargeList(_) - | DataType::ListView(_) - | DataType::LargeListView(_) - | DataType::FixedSizeList(..) - | DataType::Struct(_) - | DataType::Map(..) - | DataType::Union(..) - | DataType::Dictionary(..) - | DataType::RunEndEncoded(..) => { - return Err(ArrowError::InvalidArgumentError(format!( - "Casting to {data_type:?} is not applicable for primitive Variant types" - ))); - } - }; + .to_string(), + )); + } + DataType::Binary => Binary(VariantToBinaryArrowRowBuilder::new(cast_options, capacity)), + DataType::LargeBinary => { + LargeBinary(VariantToBinaryArrowRowBuilder::new(cast_options, capacity)) + } + DataType::BinaryView => { + BinaryView(VariantToBinaryArrowRowBuilder::new(cast_options, capacity)) + } + DataType::FixedSizeBinary(16) => { + Uuid(VariantToUuidArrowRowBuilder::new(cast_options, capacity)) + } + DataType::FixedSizeBinary(_) => { + return Err(ArrowError::NotYetImplemented(format!( + "DataType {data_type:?} not yet implemented" + ))); + } + DataType::Utf8 => String(VariantToStringArrowBuilder::new(cast_options, capacity)), + DataType::LargeUtf8 => { + LargeString(VariantToStringArrowBuilder::new(cast_options, capacity)) + } + DataType::Utf8View => StringView(VariantToStringArrowBuilder::new(cast_options, capacity)), + DataType::List(_) + | DataType::LargeList(_) + | DataType::ListView(_) + | DataType::LargeListView(_) + | DataType::FixedSizeList(..) + | DataType::Struct(_) + | DataType::Map(..) + | DataType::Union(..) + | DataType::Dictionary(..) + | DataType::RunEndEncoded(..) => { + return Err(ArrowError::InvalidArgumentError(format!( + "Casting to {data_type:?} is not applicable for primitive Variant types" + ))); + } + }; Ok(builder) } @@ -590,6 +627,7 @@ impl<'a> StructVariantToArrowRowBuilder<'a> { field.data_type(), cast_options, capacity, + false, )?); } Ok(Self { @@ -760,11 +798,12 @@ impl<'a> VariantPathRowBuilder<'a> { macro_rules! define_variant_to_primitive_builder { (struct $name:ident<$lifetime:lifetime $(, $generic:ident: $bound:path )?> |$array_param:ident $(, $field:ident: $field_type:ty)?| -> $builder_name:ident $(< $array_type:ty >)? { $init_expr: expr }, - |$value: ident| $value_transform:expr, + |$value: ident $(, $shred: ident)?| $value_transform:expr, type_name: $type_name:expr) => { pub(crate) struct $name<$lifetime $(, $generic : $bound )?> { builder: $builder_name $(<$array_type>)?, + $($shred: bool,)? cast_options: &$lifetime CastOptions<$lifetime>, } @@ -772,12 +811,14 @@ macro_rules! define_variant_to_primitive_builder { fn new( cast_options: &$lifetime CastOptions<$lifetime>, $array_param: usize, + $($shred: bool,)? // add this so that $init_expr can use it $( $field: $field_type, )? ) -> Self { Self { builder: $init_expr, cast_options, + $($shred)? } } @@ -787,6 +828,7 @@ macro_rules! define_variant_to_primitive_builder { } fn append_value(&mut self, $value: &Variant<'_, '_>) -> Result { + $(let $shred: bool = self.shred;)? match variant_cast_with_options( $value, self.cast_options, @@ -831,21 +873,21 @@ define_variant_to_primitive_builder!( define_variant_to_primitive_builder!( struct VariantToBooleanArrowRowBuilder<'a> |capacity| -> BooleanBuilder { BooleanBuilder::with_capacity(capacity) }, - |value| value.as_boolean(), + |value, shred| variant_to_boolean(value, shred), type_name: datatypes::BooleanType::DATA_TYPE ); define_variant_to_primitive_builder!( struct VariantToPrimitiveArrowRowBuilder<'a, T:PrimitiveFromVariant> |capacity| -> PrimitiveBuilder { PrimitiveBuilder::::with_capacity(capacity) }, - |value| T::from_variant(value), + |value, shred| T::from_variant(value, shred), type_name: T::DATA_TYPE ); define_variant_to_primitive_builder!( struct VariantToTimestampNtzArrowRowBuilder<'a, T:TimestampFromVariant> |capacity| -> PrimitiveBuilder { PrimitiveBuilder::::with_capacity(capacity) }, - |value| T::from_variant(value), + |value, shred| T::from_variant(value, shred), type_name: T::DATA_TYPE ); @@ -854,7 +896,7 @@ define_variant_to_primitive_builder!( |capacity, tz: Option> | -> PrimitiveBuilder { PrimitiveBuilder::::with_capacity(capacity).with_timezone_opt(tz) }, - |value| T::from_variant(value), + |value, shred| T::from_variant(value, shred), type_name: T::DATA_TYPE ); @@ -875,6 +917,7 @@ where cast_options: &'a CastOptions<'a>, precision: u8, scale: i8, + shred: bool, } impl<'a, T> VariantToDecimalArrowRowBuilder<'a, T> @@ -887,6 +930,7 @@ where capacity: usize, precision: u8, scale: i8, + shred: bool, ) -> Result { let builder = PrimitiveBuilder::::with_capacity(capacity) .with_precision_and_scale(precision, scale)?; @@ -895,6 +939,7 @@ where cast_options, precision, scale, + shred, }) } @@ -905,7 +950,7 @@ where fn append_value(&mut self, value: &Variant<'_, '_>) -> Result { match variant_cast_with_options(value, self.cast_options, |value| { - variant_to_unscaled_decimal::(value, self.precision, self.scale) + variant_to_unscaled_decimal::(value, self.precision, self.scale, self.shred) }) { Ok(Some(scaled)) => { self.builder.append_value(scaled); @@ -1046,11 +1091,16 @@ where cast_options, capacity, NullValue::ArrayElement, + false, )?; ListElementBuilder::Shredded(Box::new(builder)) } else { - let builder = - make_typed_variant_to_arrow_row_builder(element_data_type, cast_options, capacity)?; + let builder = make_typed_variant_to_arrow_row_builder( + element_data_type, + cast_options, + capacity, + false, + )?; ListElementBuilder::Typed(Box::new(builder)) }; @@ -1151,11 +1201,16 @@ impl<'a> VariantToFixedSizeListArrowRowBuilder<'a> { cast_options, capacity, NullValue::ArrayElement, + false, )?; ListElementBuilder::Shredded(Box::new(builder)) } else { - let builder = - make_typed_variant_to_arrow_row_builder(element_data_type, cast_options, capacity)?; + let builder = make_typed_variant_to_arrow_row_builder( + element_data_type, + cast_options, + capacity, + false, + )?; ListElementBuilder::Typed(Box::new(builder)) }; Ok(Self { @@ -1336,11 +1391,15 @@ mod tests { ]; for data_type in non_primitive_types { - let err = - match make_primitive_variant_to_arrow_row_builder(&data_type, &cast_options, 1) { - Ok(_) => panic!("non-primitive type {data_type:?} should be rejected"), - Err(err) => err, - }; + let err = match make_primitive_variant_to_arrow_row_builder( + &data_type, + &cast_options, + 1, + false, + ) { + Ok(_) => panic!("non-primitive type {data_type:?} should be rejected"), + Err(err) => err, + }; match err { ArrowError::InvalidArgumentError(msg) => { @@ -1358,7 +1417,7 @@ mod tests { ..Default::default() }; let mut builder = - make_primitive_variant_to_arrow_row_builder(&DataType::Int32, &cast_options, 2) + make_primitive_variant_to_arrow_row_builder(&DataType::Int32, &cast_options, 2, false) .unwrap(); assert!(!builder.append_value(&Variant::Null).unwrap()); @@ -1380,6 +1439,7 @@ mod tests { &DataType::Decimal32(9, 2), &cast_options, 2, + false, ) .unwrap(); let decimal_variant: Variant<'_, '_> = VariantDecimal4::try_new(1234, 2).unwrap().into(); @@ -1403,6 +1463,7 @@ mod tests { &DataType::FixedSizeBinary(16), &cast_options, 2, + false, ) .unwrap(); let uuid = Uuid::nil(); @@ -1428,7 +1489,7 @@ mod tests { let list_type = DataType::List(Arc::new(Field::new("item", DataType::Int64, true))); let mut list_builder = - make_typed_variant_to_arrow_row_builder(&list_type, &cast_options, 1).unwrap(); + make_typed_variant_to_arrow_row_builder(&list_type, &cast_options, 1, false).unwrap(); assert!(!list_builder.append_value(Variant::Null).unwrap()); let list_array = list_builder.finish().unwrap(); let list_array = list_array.as_any().downcast_ref::().unwrap(); @@ -1437,7 +1498,7 @@ mod tests { let struct_type = DataType::Struct(Fields::from(vec![Field::new("a", DataType::Int32, true)])); let mut struct_builder = - make_typed_variant_to_arrow_row_builder(&struct_type, &cast_options, 1).unwrap(); + make_typed_variant_to_arrow_row_builder(&struct_type, &cast_options, 1, false).unwrap(); assert!(!struct_builder.append_value(Variant::Null).unwrap()); let struct_array = struct_builder.finish().unwrap(); let struct_array = struct_array.as_any().downcast_ref::().unwrap(); diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs index c9f175c3a610..d89abdb69ac6 100644 --- a/parquet-variant/src/variant.rs +++ b/parquet-variant/src/variant.rs @@ -29,18 +29,10 @@ use crate::decoder::{ }; use crate::path::{VariantPath, VariantPathElement}; use crate::utils::{first_byte_from_slice, slice_from_slice}; -use arrow::array::ArrowNativeTypeOp; -use arrow::compute::{ - DecimalCast, cast_num_to_bool, cast_single_string_to_boolean_default, num_cast, - parse_string_to_decimal_native, single_bool_to_numeric, single_decimal_to_float_lossy, - single_float_to_decimal, -}; -use arrow::datatypes::{Decimal32Type, Decimal64Type, Decimal128Type, DecimalType}; +use std::ops::Deref; use arrow_schema::ArrowError; use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, Timelike, Utc}; -use num_traits::NumCast; -use std::ops::Deref; mod decimal; mod list; @@ -159,25 +151,6 @@ impl Deref for ShortString<'_> { /// [specification]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md /// [Variant Shredding specification]: https://github.com/apache/parquet-format/blob/master/VariantShredding.md /// -/// # Casting Semantics -/// -/// Scalar conversion semantics intentionally follow Arrow cast behavior where applicable. -/// Conversions in this module delegate to Arrow compute cast helpers such as -/// [`num_cast`], [`cast_num_to_bool`], [`single_bool_to_numeric`], and -/// [`cast_single_string_to_boolean_default`]. -/// -/// - [`Self::as_boolean`] accepts boolean, numeric, and string variants. -/// Numeric zero maps to `false`; non-zero maps to `true`. String parsing follows -/// Arrow UTF8-to-boolean cast rules. -/// - Numeric accessors such as [`Self::as_int8`], [`Self::as_int64`], [`Self::as_u8`], -/// [`Self::as_u64`], [`Self::as_f16`], [`Self::as_f32`], and [`Self::as_f64`] accept -/// boolean and numeric variants (integers, floating-point, and decimals). -/// They return `None` when conversion is not possible. -/// - Decimal accessors such as [`Self::as_decimal4`], [`Self::as_decimal8`], and -/// [`Self::as_decimal16`] accept compatible decimal variants, integer variants, -/// float variants and string variants. -/// They return `None` when conversion is not possible. -/// /// # Examples: /// /// ## Creating `Variant` from Rust Types @@ -305,35 +278,6 @@ const _: () = crate::utils::expect_size_of::(80); #[cfg(target_pointer_width = "32")] const _: () = crate::utils::expect_size_of::(48); -enum NumericKind { - Integer, - Float, -} - -trait DecimalCastTarget: NumCast + Default { - const KIND: NumericKind; -} - -macro_rules! impl_decimal_cast_target { - ($raw_type: ident, $target_kind:expr) => { - impl DecimalCastTarget for $raw_type { - const KIND: NumericKind = $target_kind; - } - }; -} - -impl_decimal_cast_target!(i8, NumericKind::Integer); -impl_decimal_cast_target!(i16, NumericKind::Integer); -impl_decimal_cast_target!(i32, NumericKind::Integer); -impl_decimal_cast_target!(i64, NumericKind::Integer); -impl_decimal_cast_target!(u8, NumericKind::Integer); -impl_decimal_cast_target!(u16, NumericKind::Integer); -impl_decimal_cast_target!(u32, NumericKind::Integer); -impl_decimal_cast_target!(u64, NumericKind::Integer); -impl_decimal_cast_target!(f16, NumericKind::Float); -impl_decimal_cast_target!(f32, NumericKind::Float); -impl_decimal_cast_target!(f64, NumericKind::Float); - impl<'m, 'v> Variant<'m, 'v> { /// Attempts to interpret a metadata and value buffer pair as a new `Variant`. /// @@ -536,7 +480,7 @@ impl<'m, 'v> Variant<'m, 'v> { /// Converts this variant to a `bool` if possible. /// - /// Returns `Some(bool)` for boolean, numeric and string variants, + /// Returns `Some(bool)` for boolean variants, /// `None` for non-boolean variants. /// /// # Examples @@ -552,30 +496,14 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v2 = Variant::from(false); /// assert_eq!(v2.as_boolean(), Some(false)); /// - /// // and a numeric variant - /// let v3 = Variant::from(3); - /// assert_eq!(v3.as_boolean(), Some(true)); - /// - /// // and a string variant - /// let v4 = Variant::from("true"); - /// assert_eq!(v4.as_boolean(), Some(true)); - /// /// // but not from other variants - /// let v5 = Variant::from("hello!"); - /// assert_eq!(v5.as_boolean(), None); + /// let v3 = Variant::from("hello!"); + /// assert_eq!(v3.as_boolean(), None); /// ``` pub fn as_boolean(&self) -> Option { match self { Variant::BooleanTrue => Some(true), Variant::BooleanFalse => Some(false), - Variant::Int8(i) => Some(cast_num_to_bool(*i)), - Variant::Int16(i) => Some(cast_num_to_bool(*i)), - Variant::Int32(i) => Some(cast_num_to_bool(*i)), - Variant::Int64(i) => Some(cast_num_to_bool(*i)), - Variant::Float(f) => Some(cast_num_to_bool(*f)), - Variant::Double(d) => Some(cast_num_to_bool(*d)), - Variant::ShortString(s) => cast_single_string_to_boolean_default(s.as_str()), - Variant::String(s) => cast_single_string_to_boolean_default(s), _ => None, } } @@ -837,71 +765,10 @@ impl<'m, 'v> Variant<'m, 'v> { } } - fn cast_decimal_to_num(raw: D::Native, scale: u8, as_float: F) -> Option - where - D: DecimalType, - D::Native: NumCast + ArrowNativeTypeOp, - T: DecimalCastTarget, - F: Fn(D::Native) -> f64, - { - let base: D::Native = NumCast::from(10)?; - - let div = base.pow_checked(>::from(scale)).ok()?; - match T::KIND { - NumericKind::Integer => raw - .div_checked(div) - .ok() - .and_then(::from::), - NumericKind::Float => T::from(single_decimal_to_float_lossy::( - &as_float, - raw, - >::from(scale), - )), - } - } - - /// Converts a boolean or numeric variant(integers, floating-point, and decimals) - /// to the specified numeric type `T`. - /// - /// Uses Arrow's casting logic to perform the conversion. Returns `Some(T)` if - /// the conversion succeeds, `None` if the variant can't be casted to type `T`. - fn as_num(&self) -> Option - where - T: DecimalCastTarget, - { - match *self { - Variant::BooleanFalse => single_bool_to_numeric(false), - Variant::BooleanTrue => single_bool_to_numeric(true), - Variant::Int8(i) => num_cast(i), - Variant::Int16(i) => num_cast(i), - Variant::Int32(i) => num_cast(i), - Variant::Int64(i) => num_cast(i), - Variant::Float(f) => num_cast(f), - Variant::Double(d) => num_cast(d), - Variant::Decimal4(d) => { - Self::cast_decimal_to_num::(d.integer(), d.scale(), |x| { - x as f64 - }) - } - Variant::Decimal8(d) => { - Self::cast_decimal_to_num::(d.integer(), d.scale(), |x| { - x as f64 - }) - } - Variant::Decimal16(d) => { - Self::cast_decimal_to_num::(d.integer(), d.scale(), |x| { - x as f64 - }) - } - _ => None, - } - } - /// Converts this variant to an `i8` if possible. /// - /// Returns `Some(i8)` for boolean and numeric variants(integers, floating-point, - /// and decimals with scale 0) that fit in `i8` range, - /// `None` for other variants or values that would overflow. + /// Returns `Some(i8)` for integer variants that fit in `i8` range, + /// `None` for non-integer variants or values that would overflow. /// /// # Examples /// @@ -912,27 +779,28 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v1 = Variant::from(123i64); /// assert_eq!(v1.as_int8(), Some(123i8)); /// - /// // or from boolean variant - /// let v2 = Variant::BooleanFalse; - /// assert_eq!(v2.as_int8(), Some(0)); - /// /// // but not if it would overflow - /// let v3 = Variant::from(1234i64); - /// assert_eq!(v3.as_int8(), None); + /// let v2 = Variant::from(1234i64); + /// assert_eq!(v2.as_int8(), None); /// /// // or if the variant cannot be cast into an integer - /// let v4 = Variant::from("hello!"); - /// assert_eq!(v4.as_int8(), None); + /// let v3 = Variant::from("hello!"); + /// assert_eq!(v3.as_int8(), None); /// ``` pub fn as_int8(&self) -> Option { - self.as_num() + match *self { + Variant::Int8(i) => Some(i), + Variant::Int16(i) => i.try_into().ok(), + Variant::Int32(i) => i.try_into().ok(), + Variant::Int64(i) => i.try_into().ok(), + _ => None, + } } /// Converts this variant to an `i16` if possible. /// - /// Returns `Some(i16)` for boolean and numeric variants(integers, floating-point, - /// and decimals with scale 0) that fit in `i16` range - /// `None` for other variants or values that would overflow. + /// Returns `Some(i16)` for integer variants that fit in `i16` range, + /// `None` for non-integer variants or values that would overflow. /// /// # Examples /// @@ -943,27 +811,28 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v1 = Variant::from(123i64); /// assert_eq!(v1.as_int16(), Some(123i16)); /// - /// // or from boolean variant - /// let v2 = Variant::BooleanFalse; - /// assert_eq!(v2.as_int16(), Some(0)); - /// /// // but not if it would overflow - /// let v3 = Variant::from(123456i64); - /// assert_eq!(v3.as_int16(), None); + /// let v2 = Variant::from(123456i64); + /// assert_eq!(v2.as_int16(), None); /// /// // or if the variant cannot be cast into an integer - /// let v4 = Variant::from("hello!"); - /// assert_eq!(v4.as_int16(), None); + /// let v3 = Variant::from("hello!"); + /// assert_eq!(v3.as_int16(), None); /// ``` pub fn as_int16(&self) -> Option { - self.as_num() + match *self { + Variant::Int8(i) => Some(i.into()), + Variant::Int16(i) => Some(i), + Variant::Int32(i) => i.try_into().ok(), + Variant::Int64(i) => i.try_into().ok(), + _ => None, + } } /// Converts this variant to an `i32` if possible. /// - /// Returns `Some(i32)` for boolean and numeric variants(integers, floating-point, - /// and decimals with scale 0) that fit in `i32` range - /// `None` for other variants or values that would overflow. + /// Returns `Some(i32)` for integer variants that fit in `i32` range, + /// `None` for non-integer variants or values that would overflow. /// /// # Examples /// @@ -974,27 +843,28 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v1 = Variant::from(123i64); /// assert_eq!(v1.as_int32(), Some(123i32)); /// - /// // or from boolean variant - /// let v2 = Variant::BooleanFalse; - /// assert_eq!(v2.as_int32(), Some(0)); - /// /// // but not if it would overflow - /// let v3 = Variant::from(12345678901i64); - /// assert_eq!(v3.as_int32(), None); + /// let v2 = Variant::from(12345678901i64); + /// assert_eq!(v2.as_int32(), None); /// /// // or if the variant cannot be cast into an integer - /// let v4 = Variant::from("hello!"); - /// assert_eq!(v4.as_int32(), None); + /// let v3 = Variant::from("hello!"); + /// assert_eq!(v3.as_int32(), None); /// ``` pub fn as_int32(&self) -> Option { - self.as_num() + match *self { + Variant::Int8(i) => Some(i.into()), + Variant::Int16(i) => Some(i.into()), + Variant::Int32(i) => Some(i), + Variant::Int64(i) => i.try_into().ok(), + _ => None, + } } /// Converts this variant to an `i64` if possible. /// - /// Returns `Some(i64)` for boolean and numeric variants(integers, floating-point, - /// and decimals with scale 0) that fit in `i64` range - /// `None` for other variants or values that would overflow. + /// Returns `Some(i64)` for integer variants, + /// `None` for non-integer variants or values that would overflow. /// /// # Examples /// @@ -1005,23 +875,37 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v1 = Variant::from(123i64); /// assert_eq!(v1.as_int64(), Some(123i64)); /// - /// // or from boolean variant - /// let v2 = Variant::BooleanFalse; - /// assert_eq!(v2.as_int64(), Some(0)); - /// /// // but not a variant that cannot be cast into an integer - /// let v3 = Variant::from("hello!"); - /// assert_eq!(v3.as_int64(), None); + /// let v2 = Variant::from("hello!"); + /// assert_eq!(v2.as_int64(), None); /// ``` pub fn as_int64(&self) -> Option { - self.as_num() + match *self { + Variant::Int8(i) => Some(i.into()), + Variant::Int16(i) => Some(i.into()), + Variant::Int32(i) => Some(i.into()), + Variant::Int64(i) => Some(i), + _ => None, + } + } + + fn generic_convert_unsigned_primitive(&self) -> Option + where + T: TryFrom + TryFrom + TryFrom + TryFrom + TryFrom, + { + match *self { + Variant::Int8(i) => i.try_into().ok(), + Variant::Int16(i) => i.try_into().ok(), + Variant::Int32(i) => i.try_into().ok(), + Variant::Int64(i) => i.try_into().ok(), + _ => None, + } } /// Converts this variant to a `u8` if possible. /// - /// Returns `Some(u8)` for boolean and numeric variants(integers, floating-point, - /// and decimals with scale 0) that fit in `u8` range - /// `None` for other variants or values that would overflow. + /// Returns `Some(u8)` for integer variants that fit in `u8` + /// `None` for non-integer variants or values that would overflow. /// /// # Examples /// @@ -1032,37 +916,27 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v1 = Variant::from(123i64); /// assert_eq!(v1.as_u8(), Some(123u8)); /// - /// // or a Decimal4 with scale 0 into u8 - /// let d = VariantDecimal4::try_new(26, 0).unwrap(); - /// let v2 = Variant::from(d); - /// assert_eq!(v2.as_u8(), Some(26u8)); - /// - /// // or a variant that decimal with scale not equal to zero - /// let d = VariantDecimal4::try_new(123, 2).unwrap(); - /// let v3 = Variant::from(d); - /// assert_eq!(v3.as_u8(), Some(1)); - /// - /// // or from boolean variant - /// let v4 = Variant::BooleanFalse; - /// assert_eq!(v4.as_u8(), Some(0)); - /// /// // but not a variant that can't fit into the range - /// let v5 = Variant::from(-1); - /// assert_eq!(v5.as_u8(), None); + /// let v3 = Variant::from(-1); + /// assert_eq!(v3.as_u8(), None); + /// + /// // or not a variant decimal + /// let d = VariantDecimal4::try_new(1, 0).unwrap(); + /// let v4 = Variant::from(d); + /// assert_eq!(v4.as_u8(), None); /// /// // or not a variant that cannot be cast into an integer - /// let v6 = Variant::from("hello!"); - /// assert_eq!(v6.as_u8(), None); + /// let v5 = Variant::from("hello!"); + /// assert_eq!(v5.as_u8(), None); /// ``` pub fn as_u8(&self) -> Option { - self.as_num() + self.generic_convert_unsigned_primitive::() } /// Converts this variant to an `u16` if possible. /// - /// Returns `Some(u16)` for boolean and numeric variants(integers, floating-point, - /// and decimals with scale 0) that fit in `u16` range - /// `None` for other variants or values that would overflow. + /// Returns `Some(u16)` for integer variants that fit in `u16` + /// `None` for non-integer variants or values that would overflow. /// /// # Examples /// @@ -1073,37 +947,27 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v1 = Variant::from(123i64); /// assert_eq!(v1.as_u16(), Some(123u16)); /// - /// // or a Decimal4 with scale 0 into u8 - /// let d = VariantDecimal4::try_new(u16::MAX as i32, 0).unwrap(); - /// let v2 = Variant::from(d); - /// assert_eq!(v2.as_u16(), Some(u16::MAX)); + /// // but not a variant that can't fit into the range + /// let v2 = Variant::from(-1); + /// assert_eq!(v2.as_u16(), None); /// - /// // or a variant that decimal with scale not equal to zero - /// let d = VariantDecimal4::try_new(123, 2).unwrap(); + /// // or not a variant decimal + /// let d = VariantDecimal4::try_new(1, 0).unwrap(); /// let v3 = Variant::from(d); - /// assert_eq!(v3.as_u16(), Some(1)); - /// - /// // or from boolean variant - /// let v4= Variant::BooleanFalse; - /// assert_eq!(v4.as_u16(), Some(0)); - /// - /// // but not a variant that can't fit into the range - /// let v5 = Variant::from(-1); - /// assert_eq!(v5.as_u16(), None); + /// assert_eq!(v3.as_u16(), None); /// /// // or not a variant that cannot be cast into an integer - /// let v6 = Variant::from("hello!"); - /// assert_eq!(v6.as_u16(), None); + /// let v4 = Variant::from("hello!"); + /// assert_eq!(v4.as_u16(), None); /// ``` pub fn as_u16(&self) -> Option { - self.as_num() + self.generic_convert_unsigned_primitive::() } /// Converts this variant to an `u32` if possible. /// - /// Returns `Some(u32)` for boolean and numeric variants(integers, floating-point, - /// and decimals with scale 0) that fit in `u32` range - /// `None` for other variants or values that would overflow. + /// Returns `Some(u32)` for integer variants that fit in `u32` + /// `None` for non-integer variants or values that would overflow. /// /// # Examples /// @@ -1114,37 +978,27 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v1 = Variant::from(123i64); /// assert_eq!(v1.as_u32(), Some(123u32)); /// - /// // or a Decimal4 with scale 0 into u8 - /// let d = VariantDecimal8::try_new(u32::MAX as i64, 0).unwrap(); - /// let v2 = Variant::from(d); - /// assert_eq!(v2.as_u32(), Some(u32::MAX)); + /// // but not a variant that can't fit into the range + /// let v2 = Variant::from(-1); + /// assert_eq!(v2.as_u32(), None); /// - /// // or a variant that decimal with scale not equal to zero - /// let d = VariantDecimal8::try_new(123, 2).unwrap(); + /// // or not a variant decimal + /// let d = VariantDecimal8::try_new(1, 0).unwrap(); /// let v3 = Variant::from(d); - /// assert_eq!(v3.as_u32(), Some(1)); - /// - /// // or from boolean variant - /// let v4 = Variant::BooleanFalse; - /// assert_eq!(v4.as_u32(), Some(0)); - /// - /// // but not a variant that can't fit into the range - /// let v5 = Variant::from(-1); - /// assert_eq!(v5.as_u32(), None); + /// assert_eq!(v3.as_u32(), None); /// /// // or not a variant that cannot be cast into an integer - /// let v6 = Variant::from("hello!"); - /// assert_eq!(v6.as_u32(), None); + /// let v4 = Variant::from("hello!"); + /// assert_eq!(v4.as_u32(), None); /// ``` pub fn as_u32(&self) -> Option { - self.as_num() + self.generic_convert_unsigned_primitive::() } /// Converts this variant to an `u64` if possible. /// - /// Returns `Some(u64)` for boolean and numeric variants(integers, floating-point, - /// and decimals with scale 0) that fit in `u64` range - /// `None` for other variants or values that would overflow. + /// Returns `Some(u64)` for integer variants that fit in `u64` + /// `None` for non-integer variants or values that would overflow. /// /// # Examples /// @@ -1155,45 +1009,21 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v1 = Variant::from(123i64); /// assert_eq!(v1.as_u64(), Some(123u64)); /// - /// // or a Decimal16 with scale 0 into u8 - /// let d = VariantDecimal16::try_new(u64::MAX as i128, 0).unwrap(); - /// let v2 = Variant::from(d); - /// assert_eq!(v2.as_u64(), Some(u64::MAX)); + /// // but not a variant that can't fit into the range + /// let v2 = Variant::from(-1); + /// assert_eq!(v2.as_u64(), None); /// - /// // or a variant that decimal with scale not equal to zero - /// let d = VariantDecimal16::try_new(123, 2).unwrap(); + /// // or not a variant decimal + /// let d = VariantDecimal16::try_new(1, 0).unwrap(); /// let v3 = Variant::from(d); - /// assert_eq!(v3.as_u64(), Some(1)); - /// - /// // or from boolean variant - /// let v4 = Variant::BooleanFalse; - /// assert_eq!(v4.as_u64(), Some(0)); - /// - /// // but not a variant that can't fit into the range - /// let v5 = Variant::from(-1); - /// assert_eq!(v5.as_u64(), None); + /// assert_eq!(v3.as_u64(), None); /// /// // or not a variant that cannot be cast into an integer - /// let v6 = Variant::from("hello!"); - /// assert_eq!(v6.as_u64(), None); + /// let v4 = Variant::from("hello!"); + /// assert_eq!(v4.as_u64(), None); /// ``` pub fn as_u64(&self) -> Option { - self.as_num() - } - - fn convert_string_to_decimal(input: &str) -> Option - where - D: DecimalType, - VD: VariantDecimalType, - D::Native: NumCast + DecimalCast, - { - // find the last '.' - let scale_usize = input.rsplit_once('.').map_or(0, |(_, frac)| frac.len()); - - let scale = u8::try_from(scale_usize).ok()?; - - let raw = parse_string_to_decimal_native::(input, scale_usize).ok()?; - VD::try_new(raw, scale).ok() + self.generic_convert_unsigned_primitive::() } /// Converts this variant to tuple with a 4-byte unscaled value if possible. @@ -1215,31 +1045,16 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v2 = Variant::from(VariantDecimal8::try_new(1234_i64, 2).unwrap()); /// assert_eq!(v2.as_decimal4(), VariantDecimal4::try_new(1234_i32, 2).ok()); /// - /// // or from string variants if they can be parsed as decimals - /// let v3 = Variant::from("123.45"); - /// assert_eq!(v3.as_decimal4(), VariantDecimal4::try_new(12345, 2).ok()); - /// /// // but not if the value would overflow i32 - /// let v4 = Variant::from(VariantDecimal8::try_new(12345678901i64, 2).unwrap()); - /// assert_eq!(v4.as_decimal4(), None); + /// let v3 = Variant::from(VariantDecimal8::try_new(12345678901i64, 2).unwrap()); + /// assert_eq!(v3.as_decimal4(), None); /// /// // or if the variant is not a decimal - /// let v5 = Variant::from("hello!"); - /// assert_eq!(v5.as_decimal4(), None); + /// let v4 = Variant::from("hello!"); + /// assert_eq!(v4.as_decimal4(), None); /// ``` pub fn as_decimal4(&self) -> Option { match *self { - Variant::Int8(_) | Variant::Int16(_) | Variant::Int32(_) | Variant::Int64(_) => { - self.as_num::().and_then(|x| x.try_into().ok()) - } - Variant::Float(f) => single_float_to_decimal::(f as _, 1f64) - .and_then(|x: i32| x.try_into().ok()), - Variant::Double(f) => single_float_to_decimal::(f, 1f64) - .and_then(|x: i32| x.try_into().ok()), - Variant::String(v) => Self::convert_string_to_decimal::(v), - Variant::ShortString(v) => { - Self::convert_string_to_decimal::(v.as_str()) - } Variant::Decimal4(decimal4) => Some(decimal4), Variant::Decimal8(decimal8) => decimal8.try_into().ok(), Variant::Decimal16(decimal16) => decimal16.try_into().ok(), @@ -1250,7 +1065,7 @@ impl<'m, 'v> Variant<'m, 'v> { /// Converts this variant to tuple with an 8-byte unscaled value if possible. /// /// Returns `Some((i64, u8))` for decimal variants where the unscaled value - /// fits in `i64` range, the scale will be 0 if the input is string variants. + /// fits in `i64` range, /// `None` for non-decimal variants or decimal values that would overflow. /// /// # Examples @@ -1266,31 +1081,16 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v2 = Variant::from(VariantDecimal16::try_new(1234_i128, 2).unwrap()); /// assert_eq!(v2.as_decimal8(), VariantDecimal8::try_new(1234_i64, 2).ok()); /// - /// // or from string variants if they can be parsed as decimals - /// let v3 = Variant::from("123.45"); - /// assert_eq!(v3.as_decimal8(), VariantDecimal8::try_new(12345, 2).ok()); - /// /// // but not if the value would overflow i64 - /// let v4 = Variant::from(VariantDecimal16::try_new(2e19 as i128, 2).unwrap()); - /// assert_eq!(v4.as_decimal8(), None); + /// let v3 = Variant::from(VariantDecimal16::try_new(2e19 as i128, 2).unwrap()); + /// assert_eq!(v3.as_decimal8(), None); /// /// // or if the variant is not a decimal - /// let v5 = Variant::from("hello!"); - /// assert_eq!(v5.as_decimal8(), None); + /// let v4 = Variant::from("hello!"); + /// assert_eq!(v4.as_decimal8(), None); /// ``` pub fn as_decimal8(&self) -> Option { match *self { - Variant::Int8(_) | Variant::Int16(_) | Variant::Int32(_) | Variant::Int64(_) => { - self.as_num::().and_then(|x| x.try_into().ok()) - } - Variant::Float(f) => single_float_to_decimal::(f as _, 1f64) - .and_then(|x: i64| x.try_into().ok()), - Variant::Double(f) => single_float_to_decimal::(f, 1f64) - .and_then(|x: i64| x.try_into().ok()), - Variant::String(v) => Self::convert_string_to_decimal::(v), - Variant::ShortString(v) => { - Self::convert_string_to_decimal::(v.as_str()) - } Variant::Decimal4(decimal4) => Some(decimal4.into()), Variant::Decimal8(decimal8) => Some(decimal8), Variant::Decimal16(decimal16) => decimal16.try_into().ok(), @@ -1301,7 +1101,7 @@ impl<'m, 'v> Variant<'m, 'v> { /// Converts this variant to tuple with a 16-byte unscaled value if possible. /// /// Returns `Some((i128, u8))` for decimal variants where the unscaled value - /// fits in `i128` range, the scale will be 0 if the input is string variants. + /// fits in `i128` range, /// `None` for non-decimal variants or decimal values that would overflow. /// /// # Examples @@ -1313,31 +1113,12 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v1 = Variant::from(VariantDecimal4::try_new(1234_i32, 2).unwrap()); /// assert_eq!(v1.as_decimal16(), VariantDecimal16::try_new(1234_i128, 2).ok()); /// - /// // or from a string variant if it can be parsed as decimal - /// let v2 = Variant::from("123.45"); - /// assert_eq!(v2.as_decimal16(), VariantDecimal16::try_new(12345, 2).ok()); - /// /// // but not if the variant is not a decimal - /// let v3 = Variant::from("hello!"); - /// assert_eq!(v3.as_decimal16(), None); + /// let v2 = Variant::from("hello!"); + /// assert_eq!(v2.as_decimal16(), None); /// ``` pub fn as_decimal16(&self) -> Option { match *self { - Variant::Int8(_) | Variant::Int16(_) | Variant::Int32(_) | Variant::Int64(_) => { - let x = self.as_num::()?; - >::from(x).try_into().ok() - } - Variant::Float(f) => { - single_float_to_decimal::(>::from(f), 1f64) - .and_then(|x| x.try_into().ok()) - } - Variant::Double(f) => { - single_float_to_decimal::(f, 1f64).and_then(|x| x.try_into().ok()) - } - Variant::String(v) => Self::convert_string_to_decimal::(v), - Variant::ShortString(v) => { - Self::convert_string_to_decimal::(v.as_str()) - } Variant::Decimal4(decimal4) => Some(decimal4.into()), Variant::Decimal8(decimal8) => Some(decimal8.into()), Variant::Decimal16(decimal16) => Some(decimal16), @@ -1347,9 +1128,7 @@ impl<'m, 'v> Variant<'m, 'v> { /// Converts this variant to an `f16` if possible. /// - /// Returns `Some(f16)` for boolean and numeric variants(integers, floating-point, - /// and decimals with scale 0) that fit in `f16` range - /// `None` otherwise. + /// Returns `Some(f16)` for floating point values, `None` otherwise. /// /// # Example /// @@ -1365,26 +1144,25 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v2 = Variant::from(std::f64::consts::PI); /// assert_eq!(v2.as_f16(), Some(f16::from_f64(std::f64::consts::PI))); /// - /// // and from boolean - /// let v3 = Variant::BooleanTrue; - /// assert_eq!(v3.as_f16(), Some(f16::from_f32(1.0))); - /// - /// // return inf if overflow - /// let v4 = Variant::from(123456); - /// assert_eq!(v4.as_f16(), Some(f16::INFINITY)); + /// // but not from integers + /// let v3 = Variant::from(2047); + /// assert_eq!(v3.as_f16(), None); /// - /// // but not from other variants - /// let v5 = Variant::from("hello!"); - /// assert_eq!(v5.as_f16(), None); + /// // or not from other variants + /// let v4 = Variant::from("hello!"); + /// assert_eq!(v4.as_f16(), None); pub fn as_f16(&self) -> Option { - self.as_num() + match *self { + Variant::Float(i) => Some(f16::from_f32(i)), + Variant::Double(i) => Some(f16::from_f64(i)), + _ => None, + } } /// Converts this variant to an `f32` if possible. /// - /// Returns `Some(f32)` for boolean and numeric variants(integers, floating-point, - /// and decimals with scale 0) that fit in `f32` range - /// `None` otherwise. + /// Returns `Some(f32)` for floating point values, and integer values with up to 24 bits of + /// precision. `None` otherwise. /// /// # Examples /// @@ -1399,27 +1177,25 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v2 = Variant::from(std::f64::consts::PI); /// assert_eq!(v2.as_f32(), Some(std::f32::consts::PI)); /// - /// // and from boolean variant - /// let v3 = Variant::BooleanTrue; - /// assert_eq!(v3.as_f32(), Some(1.0)); - /// - /// // and return inf if overflow - /// let v4 = Variant::from(f64::MAX); - /// assert_eq!(v4.as_f32(), Some(f32::INFINITY)); + /// // but not from integers + /// let v3 = Variant::from(16777215i64); + /// assert_eq!(v3.as_f32(), None); /// - /// // but not from other variants - /// let v5 = Variant::from("hello!"); - /// assert_eq!(v5.as_f32(), None); + /// // or not from other variants + /// let v4 = Variant::from("hello!"); + /// assert_eq!(v4.as_f32(), None); /// ``` pub fn as_f32(&self) -> Option { - self.as_num() + match *self { + Variant::Float(i) => Some(i), + Variant::Double(i) => Some(i as f32), + _ => None, + } } /// Converts this variant to an `f64` if possible. /// - /// Returns `Some(f64)` for boolean and numeric variants(integers, floating-point, - /// and decimals with scale 0) that fit in `f64` range - /// `None` for other variants or can't be represented by an f64. + /// Returns `Some(f64)` for floating point values, `None` otherwise. /// /// # Examples /// @@ -1434,16 +1210,20 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v2 = Variant::from(std::f64::consts::PI); /// assert_eq!(v2.as_f64(), Some(std::f64::consts::PI)); /// - /// // and from boolean variant - /// let v3 = Variant::BooleanTrue; - /// assert_eq!(v3.as_f64(), Some(1.0f64)); + /// // but not from integer variants + /// let v3 = Variant::from(9007199254740991i64); + /// assert_eq!(v3.as_f64(), None); /// - /// // but not from other variants - /// let v5 = Variant::from("hello!"); - /// assert_eq!(v5.as_f64(), None); + /// // or not from other variants + /// let v4 = Variant::from("hello!"); + /// assert_eq!(v4.as_f64(), None); /// ``` pub fn as_f64(&self) -> Option { - self.as_num() + match *self { + Variant::Float(i) => Some(i.into()), + Variant::Double(i) => Some(i), + _ => None, + } } /// Converts this variant to an `Object` if it is an [`VariantObject`]. @@ -1690,7 +1470,7 @@ impl From for Variant<'_, '_> { if let Ok(value) = i8::try_from(value) { Variant::Int8(value) } else { - Variant::Int16(num_cast(value).unwrap()) // u8 -> i16 is infallible + Variant::Int16(i16::from(value)) } } } @@ -1701,7 +1481,7 @@ impl From for Variant<'_, '_> { if let Ok(value) = i16::try_from(value) { Variant::Int16(value) } else { - Variant::Int32(num_cast(value).unwrap()) // u16 -> i32 is infallible + Variant::Int32(i32::from(value)) } } } @@ -1711,7 +1491,7 @@ impl From for Variant<'_, '_> { if let Ok(value) = i32::try_from(value) { Variant::Int32(value) } else { - Variant::Int64(num_cast(value).unwrap()) // u32 -> i64 is infallible + Variant::Int64(i64::from(value)) } } } @@ -1723,7 +1503,7 @@ impl From for Variant<'_, '_> { Variant::Int64(value) } else { // u64 max is 18446744073709551615, which fits in i128 - Variant::Decimal16(VariantDecimal16::try_new(num_cast(value).unwrap(), 0).unwrap()) + Variant::Decimal16(VariantDecimal16::try_new(i128::from(value), 0).unwrap()) } } }