diff --git a/parquet-variant-compute/src/from_json.rs b/parquet-variant-compute/src/from_json.rs index df4d7c2753ef..1de8e62bc41e 100644 --- a/parquet-variant-compute/src/from_json.rs +++ b/parquet-variant-compute/src/from_json.rs @@ -18,7 +18,8 @@ //! Module for transforming a batch of JSON strings into a batch of Variants represented as //! STRUCT -use crate::{VariantArray, VariantArrayBuilder}; +use crate::variant_array::VariantArray; +use crate::variant_array_builder::VariantArrayBuilder; use arrow::array::{Array, ArrayRef, StringArray}; use arrow_schema::ArrowError; use parquet_variant::VariantBuilder; @@ -47,7 +48,7 @@ pub fn batch_json_string_to_variant(input: &ArrayRef) -> Result Vec { + if index >= self.len() || self.is_null(index) { + return vec![]; + } + + let variant = self.value(index); + if let Some(obj) = variant.as_object() { + Vec::from_iter((0..obj.len()).map(|i| obj.field_name(i).unwrap().to_string())) + } else { + vec![] + } + } + + /// Create a new VariantArray with a field removed from all variants + pub fn with_field_removed(&self, field_name: &str) -> Result { + self.with_fields_removed(&[field_name]) + } + + /// Create a new VariantArray with multiple fields removed from all variants + pub fn with_fields_removed(&self, field_names: &[&str]) -> Result { + use parquet_variant::VariantBuilder; + use std::collections::HashSet; + + let fields_to_remove: HashSet<&str> = field_names.iter().copied().collect(); + let mut builder = crate::variant_array_builder::VariantArrayBuilder::new(self.len()); + + for i in 0..self.len() { + if self.is_null(i) { + builder.append_null(); + } else { + let variant = self.value(i); + + // If it's an object, create a new object without the specified fields + if let Some(obj) = variant.as_object() { + let mut variant_builder = VariantBuilder::new(); + let mut object_builder = variant_builder.new_object(); + + // Add all fields except the ones to remove + for (field_name, field_value) in obj.iter() { + if !fields_to_remove.contains(field_name) { + object_builder.insert(field_name, field_value); + } + } + + object_builder.finish().unwrap(); + let (metadata, value) = variant_builder.finish(); + builder.append_variant_buffers(&metadata, &value); + } else { + // Not an object, append as-is + builder.append_variant(variant); + } + } + } + + Ok(builder.build()) + } } impl Array for VariantArray { @@ -224,8 +282,10 @@ impl Array for VariantArray { #[cfg(test)] mod test { use super::*; + use crate::variant_array_builder::VariantArrayBuilder; use arrow::array::{BinaryArray, BinaryViewArray}; use arrow_schema::{Field, Fields}; + use parquet_variant::VariantBuilder; #[test] fn invalid_not_a_struct_array() { @@ -298,6 +358,125 @@ mod test { ); } + fn create_test_variant_array() -> VariantArray { + let mut builder = VariantArrayBuilder::new(2); + + // Create variant 1: {"name": "Alice", "age": 30} + let mut builder1 = VariantBuilder::new(); + builder1 + .new_object() + .with_field("name", "Alice") + .with_field("age", 30i32) + .finish() + .unwrap(); + let (metadata1, value1) = builder1.finish(); + builder.append_variant_buffers(&metadata1, &value1); + + // Create variant 2: {"name": "Bob", "age": 25, "city": "NYC"} + let mut builder2 = VariantBuilder::new(); + builder2 + .new_object() + .with_field("name", "Bob") + .with_field("age", 25i32) + .with_field("city", "NYC") + .finish() + .unwrap(); + let (metadata2, value2) = builder2.finish(); + builder.append_variant_buffers(&metadata2, &value2); + + builder.build() + } + + #[test] + fn test_variant_array_basic() { + let array = create_test_variant_array(); + assert_eq!(array.len(), 2); + assert!(!array.is_empty()); + + // Test accessing variants + let variant1 = array.value(0); + assert_eq!( + variant1.get_object_field("name").unwrap().as_string(), + Some("Alice") + ); + assert_eq!( + variant1.get_object_field("age").unwrap().as_int32(), + Some(30) + ); + + let variant2 = array.value(1); + assert_eq!( + variant2.get_object_field("name").unwrap().as_string(), + Some("Bob") + ); + assert_eq!( + variant2.get_object_field("age").unwrap().as_int32(), + Some(25) + ); + assert_eq!( + variant2.get_object_field("city").unwrap().as_string(), + Some("NYC") + ); + } + + #[test] + fn test_get_field_names() { + let array = create_test_variant_array(); + + let paths1 = array.get_field_names(0); + assert_eq!(paths1.len(), 2); + assert!(paths1.contains(&"name".to_string())); + assert!(paths1.contains(&"age".to_string())); + + let paths2 = array.get_field_names(1); + assert_eq!(paths2.len(), 3); + assert!(paths2.contains(&"name".to_string())); + assert!(paths2.contains(&"age".to_string())); + assert!(paths2.contains(&"city".to_string())); + } + + // Note: test_get_path was removed as it tested the duplicate VariantPath implementation + // Use the official parquet_variant::VariantPath with variant_get functionality instead + + #[test] + fn test_with_field_removed() { + let array = create_test_variant_array(); + + let new_array = array.with_field_removed("age").unwrap(); + + // Check that age field was removed from all variants + let variant1 = new_array.value(0); + let obj1 = variant1.as_object().unwrap(); + assert_eq!(obj1.len(), 1); + assert!(obj1.get("name").is_some()); + assert!(obj1.get("age").is_none()); + + let variant2 = new_array.value(1); + let obj2 = variant2.as_object().unwrap(); + assert_eq!(obj2.len(), 2); + assert!(obj2.get("name").is_some()); + assert!(obj2.get("age").is_none()); + assert!(obj2.get("city").is_some()); + } + + #[test] + fn test_metadata_and_value_fields() { + let array = create_test_variant_array(); + + let metadata_field = array.metadata_field(); + let value_field = array.value_field(); + + // Check that we got the expected arrays + assert_eq!(metadata_field.len(), 2); + assert_eq!(value_field.len(), 2); + + // Check that metadata and value bytes are non-empty + assert!(!metadata_field.as_binary_view().value(0).is_empty()); + assert!(!value_field.as_binary_view().value(0).is_empty()); + assert!(!metadata_field.as_binary_view().value(1).is_empty()); + assert!(!value_field.as_binary_view().value(1).is_empty()); + } + fn make_binary_view_array() -> ArrayRef { Arc::new(BinaryViewArray::from(vec![b"test" as &[u8]])) } diff --git a/parquet-variant-compute/src/variant_array_builder.rs b/parquet-variant-compute/src/variant_array_builder.rs index 6bc405c27b06..129fab583416 100644 --- a/parquet-variant-compute/src/variant_array_builder.rs +++ b/parquet-variant-compute/src/variant_array_builder.rs @@ -48,9 +48,10 @@ use std::sync::Arc; /// // append a pre-constructed metadata and value buffers /// let (metadata, value) = { /// let mut vb = VariantBuilder::new(); -/// let mut obj = vb.new_object(); -/// obj.insert("foo", "bar"); -/// obj.finish().unwrap(); +/// vb.new_object() +/// .with_field("foo", "bar") +/// .finish() +/// .unwrap(); /// vb.finish() /// }; /// builder.append_variant_buffers(&metadata, &value); @@ -132,6 +133,11 @@ impl VariantArrayBuilder { VariantArray::try_new(Arc::new(inner)).expect("valid VariantArray by construction") } + /// Finish building the VariantArray (alias for build for compatibility) + pub fn finish(self) -> VariantArray { + self.build() + } + /// Appends a null row to the builder. pub fn append_null(&mut self) { self.nulls.append_null(); diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index b3a3d9e41f13..e3a612288302 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -90,14 +90,17 @@ mod test { use std::sync::Arc; use arrow::array::{Array, ArrayRef, StringArray}; - use parquet_variant::VariantPath; use crate::batch_json_string_to_variant; use crate::VariantArray; use super::{variant_get, GetOptions}; - fn single_variant_get_test(input_json: &str, path: VariantPath, expected_json: &str) { + fn single_variant_get_test( + input_json: &str, + path: parquet_variant::VariantPath, + expected_json: &str, + ) { // Create input array from JSON string let input_array_ref: ArrayRef = Arc::new(StringArray::from(vec![Some(input_json)])); let input_variant_array_ref: ArrayRef = @@ -132,21 +135,25 @@ mod test { fn get_primitive_variant_field() { single_variant_get_test( r#"{"some_field": 1234}"#, - VariantPath::from("some_field"), + parquet_variant::VariantPath::from("some_field"), "1234", ); } #[test] fn get_primitive_variant_list_index() { - single_variant_get_test("[1234, 5678]", VariantPath::from(0), "1234"); + single_variant_get_test( + "[1234, 5678]", + parquet_variant::VariantPath::from(0), + "1234", + ); } #[test] fn get_primitive_variant_inside_object_of_object() { single_variant_get_test( r#"{"top_level_field": {"inner_field": 1234}}"#, - VariantPath::from("top_level_field").join("inner_field"), + parquet_variant::VariantPath::from("top_level_field").join("inner_field"), "1234", ); } @@ -155,7 +162,7 @@ mod test { fn get_primitive_variant_inside_list_of_object() { single_variant_get_test( r#"[{"some_field": 1234}]"#, - VariantPath::from(0).join("some_field"), + parquet_variant::VariantPath::from(0).join("some_field"), "1234", ); } @@ -164,7 +171,7 @@ mod test { fn get_primitive_variant_inside_object_of_list() { single_variant_get_test( r#"{"some_field": [1234]}"#, - VariantPath::from("some_field").join(0), + parquet_variant::VariantPath::from("some_field").join(0), "1234", ); } @@ -173,7 +180,7 @@ mod test { fn get_complex_variant() { single_variant_get_test( r#"{"top_level_field": {"inner_field": 1234}}"#, - VariantPath::from("top_level_field"), + parquet_variant::VariantPath::from("top_level_field"), r#"{"inner_field": 1234}"#, ); } diff --git a/parquet-variant/src/builder.rs b/parquet-variant/src/builder.rs index dc66865e68ac..5d3d1505ee90 100644 --- a/parquet-variant/src/builder.rs +++ b/parquet-variant/src/builder.rs @@ -305,11 +305,7 @@ impl ValueBuffer { /// /// This method will panic if the variant contains duplicate field names in objects /// when validation is enabled. For a fallible version, use [`ValueBuffer::try_append_variant`] - fn append_variant<'m, 'd>( - &mut self, - variant: Variant<'m, 'd>, - metadata_builder: &mut MetadataBuilder, - ) { + fn append_variant(&mut self, variant: Variant<'_, '_>, metadata_builder: &mut MetadataBuilder) { match variant { Variant::Null => self.append_null(), Variant::BooleanTrue => self.append_bool(true), @@ -335,9 +331,9 @@ impl ValueBuffer { } /// Appends a variant to the buffer - fn try_append_variant<'m, 'd>( + fn try_append_variant( &mut self, - variant: Variant<'m, 'd>, + variant: Variant<'_, '_>, metadata_builder: &mut MetadataBuilder, ) -> Result<(), ArrowError> { match variant { diff --git a/parquet-variant/src/path.rs b/parquet-variant/src/path.rs index 42dbdb3abc2d..7a94d6f0a859 100644 --- a/parquet-variant/src/path.rs +++ b/parquet-variant/src/path.rs @@ -103,7 +103,7 @@ impl<'a> From<&'a str> for VariantPath<'a> { } /// Create from usize -impl<'a> From for VariantPath<'a> { +impl From for VariantPath<'_> { fn from(index: usize) -> Self { VariantPath::new(vec![VariantPathElement::index(index)]) } @@ -152,7 +152,7 @@ impl<'a> From<&'a str> for VariantPathElement<'a> { } } -impl<'a> From for VariantPathElement<'a> { +impl From for VariantPathElement<'_> { fn from(name: String) -> Self { VariantPathElement::field(Cow::Owned(name)) } @@ -164,7 +164,7 @@ impl<'a> From<&'a String> for VariantPathElement<'a> { } } -impl<'a> From for VariantPathElement<'a> { +impl From for VariantPathElement<'_> { fn from(index: usize) -> Self { VariantPathElement::index(index) } diff --git a/parquet-variant/src/variant/object.rs b/parquet-variant/src/variant/object.rs index b809fe278cb4..6a006089a6c6 100644 --- a/parquet-variant/src/variant/object.rs +++ b/parquet-variant/src/variant/object.rs @@ -410,7 +410,7 @@ impl<'m, 'v> VariantObject<'m, 'v> { // // Instead of comparing the raw bytes of 2 variant objects, this implementation recursively // checks whether the field values are equal -- regardless of their order -impl<'m, 'v> PartialEq for VariantObject<'m, 'v> { +impl PartialEq for VariantObject<'_, '_> { fn eq(&self, other: &Self) -> bool { if self.num_elements != other.num_elements { return false;