From 0bdb8d68e3427a3b061315ff7a3b823e5a9a163e Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 4 Jun 2025 07:56:16 -0400 Subject: [PATCH 1/3] Move variant interop test to Rust integration test --- parquet-variant/src/lib.rs | 5 +---- parquet-variant/src/variant.rs | 8 +++++++- .../{src/test_variant.rs => tests/variant_interop.rs} | 2 +- 3 files changed, 9 insertions(+), 6 deletions(-) rename parquet-variant/{src/test_variant.rs => tests/variant_interop.rs} (98%) diff --git a/parquet-variant/src/lib.rs b/parquet-variant/src/lib.rs index a31187daeb69..557271823bc5 100644 --- a/parquet-variant/src/lib.rs +++ b/parquet-variant/src/lib.rs @@ -30,12 +30,9 @@ // TODO: dead code removal #[allow(dead_code)] mod decoder; -// TODO: dead code removal -#[allow(dead_code)] mod variant; // TODO: dead code removal #[allow(dead_code)] mod utils; -#[cfg(test)] -mod test_variant; +pub use variant::*; diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs index 999826f5c274..b7b1932580b1 100644 --- a/parquet-variant/src/variant.rs +++ b/parquet-variant/src/variant.rs @@ -88,7 +88,7 @@ impl OffsetSizeBytes { } #[derive(Clone, Debug, Copy, PartialEq)] -pub(crate) struct VariantMetadataHeader { +pub struct VariantMetadataHeader { version: u8, is_sorted: bool, /// Note: This is `offset_size_minus_one` + 1 @@ -323,10 +323,16 @@ pub struct VariantArray<'m, 'v> { } impl<'m, 'v> VariantArray<'m, 'v> { + /// Return the length of this array pub fn len(&self) -> usize { todo!() } + /// Is the array of zero length + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + pub fn values(&self) -> Result>, ArrowError> { todo!(); #[allow(unreachable_code)] // Just to infer the return type diff --git a/parquet-variant/src/test_variant.rs b/parquet-variant/tests/variant_interop.rs similarity index 98% rename from parquet-variant/src/test_variant.rs rename to parquet-variant/tests/variant_interop.rs index 07c9eaf9c6f0..1f82afdc2ef0 100644 --- a/parquet-variant/src/test_variant.rs +++ b/parquet-variant/tests/variant_interop.rs @@ -23,8 +23,8 @@ use std::fs; use std::path::{Path, PathBuf}; -use crate::variant::{Variant, VariantMetadata}; use arrow_schema::ArrowError; +use parquet_variant::{Variant, VariantMetadata}; fn cases_dir() -> PathBuf { Path::new(env!("CARGO_MANIFEST_DIR")) From 0045a92dbfb9fe227981dc6887cadd1e457be6c9 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 8 Jun 2025 13:28:51 -0400 Subject: [PATCH 2/3] update comment --- parquet-testing | 2 +- parquet-variant/tests/variant_interop.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/parquet-testing b/parquet-testing index 2dc8bf140ed6..b68bea40fed8 160000 --- a/parquet-testing +++ b/parquet-testing @@ -1 +1 @@ -Subproject commit 2dc8bf140ed6e28652fc347211c7d661714c7f95 +Subproject commit b68bea40fed8d1a780a9e09dd2262017e04b19ad diff --git a/parquet-variant/tests/variant_interop.rs b/parquet-variant/tests/variant_interop.rs index 1f82afdc2ef0..22b45a4c000f 100644 --- a/parquet-variant/tests/variant_interop.rs +++ b/parquet-variant/tests/variant_interop.rs @@ -49,7 +49,7 @@ fn get_primitive_cases() -> Vec<(&'static str, Variant<'static, 'static>)> { ("primitive_string", Variant::from("This string is longer than 64 bytes and therefore does not fit in a short_string and it also includes several non ascii characters such as 🐢, 💖, ♥\u{fe0f}, 🎣 and 🤦!!")), // Using the From trait ("short_string", Variant::from("Less than 64 bytes (❤\u{fe0f} with utf8)")), - // TODO Reenable when https://github.com/apache/parquet-testing/issues/81 is fixed + // TODO Reenable when https://github.com/apache/parquet-testing/pull/86 is merged // ("primitive_null", Variant::Null), ] } From 971a8e4c6f957d7c2be8c62346c17328895f8afb Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 9 Jun 2025 09:03:48 -0400 Subject: [PATCH 3/3] Test null --- parquet-variant/tests/variant_interop.rs | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/parquet-variant/tests/variant_interop.rs b/parquet-variant/tests/variant_interop.rs index 22b45a4c000f..33bbbd5f5e15 100644 --- a/parquet-variant/tests/variant_interop.rs +++ b/parquet-variant/tests/variant_interop.rs @@ -42,15 +42,14 @@ fn load_case(name: &str) -> Result<(Vec, Vec), ArrowError> { fn get_primitive_cases() -> Vec<(&'static str, Variant<'static, 'static>)> { vec![ - ("primitive_boolean_false", Variant::BooleanFalse), - ("primitive_boolean_true", Variant::BooleanTrue), - ("primitive_int8", Variant::Int8(42)), - // Using the From trait - ("primitive_string", Variant::from("This string is longer than 64 bytes and therefore does not fit in a short_string and it also includes several non ascii characters such as 🐢, 💖, ♥\u{fe0f}, 🎣 and 🤦!!")), - // Using the From trait - ("short_string", Variant::from("Less than 64 bytes (❤\u{fe0f} with utf8)")), - // TODO Reenable when https://github.com/apache/parquet-testing/pull/86 is merged - // ("primitive_null", Variant::Null), + ("primitive_null", Variant::Null), + ("primitive_boolean_false", Variant::BooleanFalse), + ("primitive_boolean_true", Variant::BooleanTrue), + ("primitive_int8", Variant::Int8(42)), + // Using the From trait + ("primitive_string", Variant::from("This string is longer than 64 bytes and therefore does not fit in a short_string and it also includes several non ascii characters such as 🐢, 💖, ♥\u{fe0f}, 🎣 and 🤦!!")), + // Using the From trait + ("short_string", Variant::from("Less than 64 bytes (❤\u{fe0f} with utf8)")), ] }