From c4cd8d0df6d0efd6cd041b9eab69a2e43e772ddd Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 3 Jul 2025 14:49:00 -0400 Subject: [PATCH 1/4] [Variant] Introduce parquet-variant-json crate --- Cargo.toml | 1 + parquet-variant-json/Cargo.toml | 50 +++++++++++++++++++++++++++++++++ parquet-variant-json/src/lib.rs | 0 3 files changed, 51 insertions(+) create mode 100644 parquet-variant-json/Cargo.toml create mode 100644 parquet-variant-json/src/lib.rs diff --git a/Cargo.toml b/Cargo.toml index a9b00f9537dc..4d103e78b6cb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,6 +40,7 @@ members = [ "arrow-string", "parquet", "parquet-variant", + "parquet-variant-json", "parquet_derive", "parquet_derive_test", ] diff --git a/parquet-variant-json/Cargo.toml b/parquet-variant-json/Cargo.toml new file mode 100644 index 000000000000..d8e1ef7cde3d --- /dev/null +++ b/parquet-variant-json/Cargo.toml @@ -0,0 +1,50 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +name = "parquet-variant-json" +# This package is still in development and thus the version does +# not follow the versions of the rest of the crates in this repo. +version = "0.1.0" +license = { workspace = true } +description = "Apache Parquet Variant to/from JSON" +homepage = { workspace = true } +repository = { workspace = true } +authors = { workspace = true } +keywords = ["arrow", "parquet", "variant"] +readme = "README.md" +edition = { workspace = true } +# needs a newer version than workspace due to +# rror: `Option::::unwrap` is not yet stable as a const fn +rust-version = "1.83" + + +[dependencies] +arrow-schema = { workspace = true } +parquet-variant = { path = "../parquet-variant" } +chrono = { workspace = true } +serde_json = "1.0" +base64 = "0.22" +indexmap = "2.10.0" + + +[lib] +name = "parquet_variant_json" +bench = false + +[dev-dependencies] + diff --git a/parquet-variant-json/src/lib.rs b/parquet-variant-json/src/lib.rs new file mode 100644 index 000000000000..e69de29bb2d1 From 2d959ec41ccacf8faaa9c82c11c425ea173f25f8 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 3 Jul 2025 14:54:58 -0400 Subject: [PATCH 2/4] update --- .../src/from_json.rs | 1 - parquet-variant-json/src/lib.rs | 38 ++++++++++++++ .../src/to_json.rs | 25 +++++----- .../examples/variant_from_json_examples.rs | 50 ------------------- parquet-variant/src/lib.rs | 4 -- 5 files changed, 50 insertions(+), 68 deletions(-) rename {parquet-variant => parquet-variant-json}/src/from_json.rs (98%) rename {parquet-variant => parquet-variant-json}/src/to_json.rs (98%) delete mode 100644 parquet-variant/examples/variant_from_json_examples.rs diff --git a/parquet-variant/src/from_json.rs b/parquet-variant-json/src/from_json.rs similarity index 98% rename from parquet-variant/src/from_json.rs rename to parquet-variant-json/src/from_json.rs index 00d205f38584..ab632e848ffb 100644 --- a/parquet-variant/src/from_json.rs +++ b/parquet-variant-json/src/from_json.rs @@ -17,7 +17,6 @@ //! Module for parsing JSON strings as Variant -use crate::{ListBuilder, ObjectBuilder, Variant, VariantBuilder, VariantBuilderExt}; use arrow_schema::ArrowError; use serde_json::{Number, Value}; diff --git a/parquet-variant-json/src/lib.rs b/parquet-variant-json/src/lib.rs index e69de29bb2d1..bb774c05c135 100644 --- a/parquet-variant-json/src/lib.rs +++ b/parquet-variant-json/src/lib.rs @@ -0,0 +1,38 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Conversion between [JSON] and the [Variant Binary Encoding] from [Apache Parquet]. +//! +//! [JSON]: https://www.json.org/json-en.html +//! [Variant Binary Encoding]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md +//! [Apache Parquet]: https://parquet.apache.org/ +//! +//! * See [`json_to_variant`] for converting a JSON string to a Variant. +//! * See [`variant_to_json`] for converting a Variant to a JSON string. +//! +//! ## 🚧 Work In Progress +//! +//! This crate is under active development and is not yet ready for production use. +//! If you are interested in helping, you can find more information on the GitHub [Variant issue] +//! +//! [Variant issue]: https://github.com/apache/arrow-rs/issues/6736 + +mod from_json; +mod to_json; + +pub use from_json::json_to_variant; +pub use to_json::{variant_to_json, variant_to_json_string, variant_to_json_value}; diff --git a/parquet-variant/src/to_json.rs b/parquet-variant-json/src/to_json.rs similarity index 98% rename from parquet-variant/src/to_json.rs rename to parquet-variant-json/src/to_json.rs index b27fca6108d2..aa66b6801b32 100644 --- a/parquet-variant/src/to_json.rs +++ b/parquet-variant-json/src/to_json.rs @@ -21,7 +21,7 @@ use base64::{engine::general_purpose, Engine as _}; use serde_json::Value; use std::io::Write; -use crate::variant::{Variant, VariantList, VariantObject}; +use parquet_variant::variant::{Variant, VariantList, VariantObject}; // Format string constants to avoid duplication and reduce errors const DATE_FORMAT: &str = "%Y-%m-%d"; @@ -366,7 +366,6 @@ pub fn variant_to_json_value(variant: &Variant) -> Result { #[cfg(test)] mod tests { use super::*; - use crate::{Variant, VariantDecimal16, VariantDecimal4, VariantDecimal8}; use chrono::{DateTime, NaiveDate, Utc}; #[test] @@ -490,7 +489,7 @@ mod tests { #[test] fn test_short_string_to_json() -> Result<(), ArrowError> { - use crate::variant::ShortString; + use parquet_variant::ShortString; let short_string = ShortString::try_new("short")?; let variant = Variant::ShortString(short_string); let json = variant_to_json_string(&variant)?; @@ -598,7 +597,7 @@ mod tests { #[test] fn test_primitive_json_conversion() { - use crate::variant::ShortString; + use parquet_variant::ShortString; // Null JsonTest { @@ -848,7 +847,7 @@ mod tests { #[test] fn test_simple_object_to_json() -> Result<(), ArrowError> { - use crate::builder::VariantBuilder; + use parquet_variant::VariantBuilder; // Create a simple object with various field types let mut builder = VariantBuilder::new(); @@ -884,7 +883,7 @@ mod tests { #[test] fn test_empty_object_to_json() -> Result<(), ArrowError> { - use crate::builder::VariantBuilder; + use parquet_variant::VariantBuilder; let mut builder = VariantBuilder::new(); @@ -906,7 +905,7 @@ mod tests { #[test] fn test_object_with_special_characters_to_json() -> Result<(), ArrowError> { - use crate::builder::VariantBuilder; + use parquet_variant::VariantBuilder; let mut builder = VariantBuilder::new(); @@ -936,7 +935,7 @@ mod tests { #[test] fn test_simple_list_to_json() -> Result<(), ArrowError> { - use crate::builder::VariantBuilder; + use parquet_variant::VariantBuilder; let mut builder = VariantBuilder::new(); @@ -966,7 +965,7 @@ mod tests { #[test] fn test_empty_list_to_json() -> Result<(), ArrowError> { - use crate::builder::VariantBuilder; + use parquet_variant::VariantBuilder; let mut builder = VariantBuilder::new(); @@ -988,7 +987,7 @@ mod tests { #[test] fn test_mixed_type_list_to_json() -> Result<(), ArrowError> { - use crate::builder::VariantBuilder; + use parquet_variant::VariantBuilder; let mut builder = VariantBuilder::new(); @@ -1020,7 +1019,7 @@ mod tests { #[test] fn test_object_field_ordering_in_json() -> Result<(), ArrowError> { - use crate::builder::VariantBuilder; + use parquet_variant::VariantBuilder; let mut builder = VariantBuilder::new(); @@ -1050,7 +1049,7 @@ mod tests { #[test] fn test_list_with_various_primitive_types_to_json() -> Result<(), ArrowError> { - use crate::builder::VariantBuilder; + use parquet_variant::VariantBuilder; let mut builder = VariantBuilder::new(); @@ -1086,7 +1085,7 @@ mod tests { #[test] fn test_object_with_various_primitive_types_to_json() -> Result<(), ArrowError> { - use crate::builder::VariantBuilder; + use parquet_variant::VariantBuilder; let mut builder = VariantBuilder::new(); diff --git a/parquet-variant/examples/variant_from_json_examples.rs b/parquet-variant/examples/variant_from_json_examples.rs deleted file mode 100644 index e8a8a9d24959..000000000000 --- a/parquet-variant/examples/variant_from_json_examples.rs +++ /dev/null @@ -1,50 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Example showing how to convert Variant values to JSON - -use parquet_variant::{ - json_to_variant, variant_to_json, variant_to_json_string, variant_to_json_value, VariantBuilder, -}; - -fn main() -> Result<(), Box> { - let person_string = "{\"name\":\"Alice\", \"age\":30, ".to_string() - + "\"email\":\"alice@example.com\", \"is_active\": true, \"score\": 95.7," - + "\"additional_info\": null}"; - - let mut variant_builder = VariantBuilder::new(); - json_to_variant(&person_string, &mut variant_builder)?; - - let (metadata, value) = variant_builder.finish(); - - let variant = parquet_variant::Variant::try_new(&metadata, &value)?; - - let json_result = variant_to_json_string(&variant)?; - let json_value = variant_to_json_value(&variant)?; - let pretty_json = serde_json::to_string_pretty(&json_value)?; - println!("{pretty_json}"); - - let mut buffer = Vec::new(); - variant_to_json(&mut buffer, &variant)?; - let buffer_result = String::from_utf8(buffer)?; - assert_eq!(json_result, "{\"additional_info\":null,\"age\":30,".to_string() + - "\"email\":\"alice@example.com\",\"is_active\":true,\"name\":\"Alice\",\"score\":95.7}"); - assert_eq!(json_result, buffer_result); - assert_eq!(json_result, serde_json::to_string(&json_value)?); - - Ok(()) -} diff --git a/parquet-variant/src/lib.rs b/parquet-variant/src/lib.rs index 7dbfff52b1b5..00a8a69aff99 100644 --- a/parquet-variant/src/lib.rs +++ b/parquet-variant/src/lib.rs @@ -33,12 +33,8 @@ mod decoder; mod variant; // TODO: dead code removal mod builder; -mod from_json; -mod to_json; #[allow(dead_code)] mod utils; pub use builder::*; -pub use from_json::json_to_variant; -pub use to_json::{variant_to_json, variant_to_json_string, variant_to_json_value}; pub use variant::*; From 0b5d108ba4420acd702a0d9438fa564db5eb67f9 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 3 Jul 2025 15:07:36 -0400 Subject: [PATCH 3/4] Update dependencies, move code --- .github/workflows/parquet-variant.yml | 8 +- Cargo.toml | 4 + parquet-variant-json/src/from_json.rs | 546 ++++++++++++++++- parquet-variant-json/src/to_json.rs | 18 +- parquet-variant/src/builder.rs | 8 +- parquet-variant/tests/test_json_to_variant.rs | 552 ------------------ 6 files changed, 571 insertions(+), 565 deletions(-) delete mode 100644 parquet-variant/tests/test_json_to_variant.rs diff --git a/.github/workflows/parquet-variant.yml b/.github/workflows/parquet-variant.yml index 6fc5c3a8cd00..6ad4e86be422 100644 --- a/.github/workflows/parquet-variant.yml +++ b/.github/workflows/parquet-variant.yml @@ -46,8 +46,10 @@ jobs: submodules: true - name: Setup Rust toolchain uses: ./.github/actions/setup-builder - - name: Test + - name: Test parquet-variant run: cargo test -p parquet-variant + - name: Test parquet-variant-json + run: cargo test -p parquet-variant-json # test compilation linux-features: @@ -63,6 +65,8 @@ jobs: uses: ./.github/actions/setup-builder - name: Check compilation run: cargo check -p parquet-variant + - name: Check compilation + run: cargo check -p parquet-variant-json clippy: name: Clippy @@ -77,3 +81,5 @@ jobs: run: rustup component add clippy - name: Run clippy run: cargo clippy -p parquet-variant --all-targets --all-features -- -D warnings + - name: Run clippy + run: cargo clippy -p parquet-variant-json --all-targets --all-features -- -D warnings diff --git a/Cargo.toml b/Cargo.toml index 4d103e78b6cb..ca88a9fd81fd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -97,6 +97,10 @@ arrow-select = { version = "55.2.0", path = "./arrow-select" } arrow-string = { version = "55.2.0", path = "./arrow-string" } parquet = { version = "55.2.0", path = "./parquet", default-features = false } +# These crates have not yet been released and thus do not use the workspace version +parquet-variant = { version = "0.1.0", path = "./parquet-variant"} +parquet-variant-json = { version = "0.1.0", path = "./parquet-variant-json" } + chrono = { version = "0.4.40", default-features = false, features = ["clock"] } # release inherited profile keeping debug information and symbols diff --git a/parquet-variant-json/src/from_json.rs b/parquet-variant-json/src/from_json.rs index ab632e848ffb..c2d923557b61 100644 --- a/parquet-variant-json/src/from_json.rs +++ b/parquet-variant-json/src/from_json.rs @@ -18,6 +18,7 @@ //! Module for parsing JSON strings as Variant use arrow_schema::ArrowError; +use parquet_variant::{ListBuilder, ObjectBuilder, Variant, VariantBuilder, VariantBuilderExt}; use serde_json::{Number, Value}; /// Converts a JSON string to Variant using [`VariantBuilder`]. The resulting `value` and `metadata` @@ -34,9 +35,10 @@ use serde_json::{Number, Value}; /// * `Err` with error details if the conversion fails /// /// ```rust -/// # use parquet_variant::{ -/// json_to_variant, variant_to_json, variant_to_json_string, variant_to_json_value, VariantBuilder -/// }; +/// # use parquet_variant::VariantBuilder; +/// # use parquet_variant_json::{ +/// # json_to_variant, variant_to_json_string, variant_to_json, variant_to_json_value +/// # }; /// /// let mut variant_builder = VariantBuilder::new(); /// let person_string = "{\"name\":\"Alice\", \"age\":30, ".to_string() @@ -148,3 +150,541 @@ impl<'m, 'v> VariantBuilderExt<'m, 'v> for ObjectFieldBuilder<'_, '_, '_> { self.builder.new_object(self.key) } } + +#[cfg(test)] +mod test { + use super::*; + use crate::variant_to_json_string; + use arrow_schema::ArrowError; + use parquet_variant::{ + ShortString, Variant, VariantBuilder, VariantDecimal16, VariantDecimal4, VariantDecimal8, + }; + + struct JsonToVariantTest<'a> { + json: &'a str, + expected: Variant<'a, 'a>, + } + + impl<'a> JsonToVariantTest<'a> { + fn run(self) -> Result<(), ArrowError> { + let mut variant_builder = VariantBuilder::new(); + json_to_variant(self.json, &mut variant_builder)?; + let (metadata, value) = variant_builder.finish(); + let variant = Variant::try_new(&metadata, &value)?; + assert_eq!(variant, self.expected); + Ok(()) + } + } + + #[test] + fn test_json_to_variant_null() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "null", + expected: Variant::Null, + } + .run() + } + + #[test] + fn test_json_to_variant_boolean_true() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "true", + expected: Variant::BooleanTrue, + } + .run() + } + + #[test] + fn test_json_to_variant_boolean_false() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "false", + expected: Variant::BooleanFalse, + } + .run() + } + + #[test] + fn test_json_to_variant_int8_positive() -> Result<(), ArrowError> { + JsonToVariantTest { + json: " 127 ", + expected: Variant::Int8(127), + } + .run() + } + + #[test] + fn test_json_to_variant_int8_negative() -> Result<(), ArrowError> { + JsonToVariantTest { + json: " -128 ", + expected: Variant::Int8(-128), + } + .run() + } + + #[test] + fn test_json_to_variant_int16() -> Result<(), ArrowError> { + JsonToVariantTest { + json: " 27134 ", + expected: Variant::Int16(27134), + } + .run() + } + + #[test] + fn test_json_to_variant_int32() -> Result<(), ArrowError> { + JsonToVariantTest { + json: " -32767431 ", + expected: Variant::Int32(-32767431), + } + .run() + } + + #[test] + fn test_json_to_variant_int64() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "92842754201389", + expected: Variant::Int64(92842754201389), + } + .run() + } + + #[ignore] + #[test] + fn test_json_to_variant_decimal4_basic() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "1.23", + expected: Variant::from(VariantDecimal4::try_new(123, 2)?), + } + .run() + } + + #[ignore] + #[test] + fn test_json_to_variant_decimal4_large_positive() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "99999999.9", + expected: Variant::from(VariantDecimal4::try_new(999999999, 1)?), + } + .run() + } + + #[ignore] + #[test] + fn test_json_to_variant_decimal4_large_negative() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "-99999999.9", + expected: Variant::from(VariantDecimal4::try_new(-999999999, 1)?), + } + .run() + } + + #[ignore] + #[test] + fn test_json_to_variant_decimal4_small_positive() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "0.999999999", + expected: Variant::from(VariantDecimal4::try_new(999999999, 9)?), + } + .run() + } + + #[ignore] + #[test] + fn test_json_to_variant_decimal4_tiny_positive() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "0.000000001", + expected: Variant::from(VariantDecimal4::try_new(1, 9)?), + } + .run() + } + + #[ignore] + #[test] + fn test_json_to_variant_decimal4_small_negative() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "-0.999999999", + expected: Variant::from(VariantDecimal4::try_new(-999999999, 9)?), + } + .run() + } + + #[ignore] + #[test] + fn test_json_to_variant_decimal8_positive() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "999999999.0", + expected: Variant::from(VariantDecimal8::try_new(9999999990, 1)?), + } + .run() + } + + #[ignore] + #[test] + fn test_json_to_variant_decimal8_negative() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "-999999999.0", + expected: Variant::from(VariantDecimal8::try_new(-9999999990, 1)?), + } + .run() + } + + #[ignore] + #[test] + fn test_json_to_variant_decimal8_high_precision() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "0.999999999999999999", + expected: Variant::from(VariantDecimal8::try_new(999999999999999999, 18)?), + } + .run() + } + + #[ignore] + #[test] + fn test_json_to_variant_decimal8_large_with_scale() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "9999999999999999.99", + expected: Variant::from(VariantDecimal8::try_new(999999999999999999, 2)?), + } + .run() + } + + #[ignore] + #[test] + fn test_json_to_variant_decimal8_large_negative_with_scale() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "-9999999999999999.99", + expected: Variant::from(VariantDecimal8::try_new(-999999999999999999, 2)?), + } + .run() + } + + #[ignore] + #[test] + fn test_json_to_variant_decimal16_large_integer() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "9999999999999999999", // integer larger than i64 + expected: Variant::from(VariantDecimal16::try_new(9999999999999999999, 0)?), + } + .run() + } + + #[ignore] + #[test] + fn test_json_to_variant_decimal16_high_precision() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "0.9999999999999999999", + expected: Variant::from(VariantDecimal16::try_new(9999999999999999999, 19)?), + } + .run() + } + + #[ignore] + #[test] + fn test_json_to_variant_decimal16_max_value() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "79228162514264337593543950335", // 2 ^ 96 - 1 + expected: Variant::from(VariantDecimal16::try_new(79228162514264337593543950335, 0)?), + } + .run() + } + + #[ignore] + #[test] + fn test_json_to_variant_decimal16_max_scale() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "7.9228162514264337593543950335", // using scale higher than this falls into double + // since the max scale is 28. + expected: Variant::from(VariantDecimal16::try_new( + 79228162514264337593543950335, + 28, + )?), + } + .run() + } + + #[test] + fn test_json_to_variant_double_precision() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "0.79228162514264337593543950335", + expected: Variant::Double(0.792_281_625_142_643_4_f64), + } + .run() + } + + #[test] + fn test_json_to_variant_double_scientific_positive() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "15e-1", + expected: Variant::Double(15e-1f64), + } + .run() + } + + #[test] + fn test_json_to_variant_double_scientific_negative() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "-15e-1", + expected: Variant::Double(-15e-1f64), + } + .run() + } + + #[test] + fn test_json_to_variant_short_string() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "\"harsh\"", + expected: Variant::ShortString(ShortString::try_new("harsh")?), + } + .run() + } + + #[test] + fn test_json_to_variant_short_string_max_length() -> Result<(), ArrowError> { + JsonToVariantTest { + json: &format!("\"{}\"", "a".repeat(63)), + expected: Variant::ShortString(ShortString::try_new(&"a".repeat(63))?), + } + .run() + } + + #[test] + fn test_json_to_variant_long_string() -> Result<(), ArrowError> { + JsonToVariantTest { + json: &format!("\"{}\"", "a".repeat(64)), + expected: Variant::String(&"a".repeat(64)), + } + .run() + } + + #[test] + fn test_json_to_variant_very_long_string() -> Result<(), ArrowError> { + JsonToVariantTest { + json: &format!("\"{}\"", "b".repeat(100000)), + expected: Variant::String(&"b".repeat(100000)), + } + .run() + } + + #[test] + fn test_json_to_variant_array_simple() -> Result<(), ArrowError> { + let mut variant_builder = VariantBuilder::new(); + let mut list_builder = variant_builder.new_list(); + list_builder.append_value(Variant::Int8(127)); + list_builder.append_value(Variant::Int16(128)); + list_builder.append_value(Variant::Int32(-32767431)); + list_builder.finish(); + let (metadata, value) = variant_builder.finish(); + let variant = Variant::try_new(&metadata, &value)?; + + JsonToVariantTest { + json: "[127, 128, -32767431]", + expected: variant, + } + .run() + } + + #[test] + fn test_json_to_variant_array_with_object() -> Result<(), ArrowError> { + let mut variant_builder = VariantBuilder::new(); + let mut list_builder = variant_builder.new_list(); + let mut object_builder_inner = list_builder.new_object(); + object_builder_inner.insert("age", Variant::Int8(32)); + object_builder_inner.finish().unwrap(); + list_builder.append_value(Variant::Int16(128)); + list_builder.append_value(Variant::BooleanFalse); + list_builder.finish(); + let (metadata, value) = variant_builder.finish(); + let variant = Variant::try_new(&metadata, &value)?; + + JsonToVariantTest { + json: "[{\"age\": 32}, 128, false]", + expected: variant, + } + .run() + } + + #[test] + fn test_json_to_variant_array_large_u16_offset() -> Result<(), ArrowError> { + // u16 offset - 128 i8's + 1 "true" = 257 bytes + let mut variant_builder = VariantBuilder::new(); + let mut list_builder = variant_builder.new_list(); + for _ in 0..128 { + list_builder.append_value(Variant::Int8(1)); + } + list_builder.append_value(Variant::BooleanTrue); + list_builder.finish(); + let (metadata, value) = variant_builder.finish(); + let variant = Variant::try_new(&metadata, &value)?; + + JsonToVariantTest { + json: &format!("[{} true]", "1, ".repeat(128)), + expected: variant, + } + .run() + } + + #[test] + fn test_json_to_variant_array_nested_large() -> Result<(), ArrowError> { + // verify u24, and large_size + let mut variant_builder = VariantBuilder::new(); + let mut list_builder = variant_builder.new_list(); + for _ in 0..256 { + let mut list_builder_inner = list_builder.new_list(); + for _ in 0..255 { + list_builder_inner.append_value(Variant::Null); + } + list_builder_inner.finish(); + } + list_builder.finish(); + let (metadata, value) = variant_builder.finish(); + let variant = Variant::try_new(&metadata, &value)?; + let intermediate = format!("[{}]", vec!["null"; 255].join(", ")); + let json = format!("[{}]", vec![intermediate; 256].join(", ")); + JsonToVariantTest { + json: json.as_str(), + expected: variant, + } + .run() + } + + #[test] + fn test_json_to_variant_object_simple() -> Result<(), ArrowError> { + let mut variant_builder = VariantBuilder::new(); + let mut object_builder = variant_builder.new_object(); + object_builder.insert("a", Variant::Int8(3)); + object_builder.insert("b", Variant::Int8(2)); + object_builder.finish().unwrap(); + let (metadata, value) = variant_builder.finish(); + let variant = Variant::try_new(&metadata, &value)?; + JsonToVariantTest { + json: "{\"b\": 2, \"a\": 1, \"a\": 3}", + expected: variant, + } + .run() + } + + #[test] + fn test_json_to_variant_object_complex() -> Result<(), ArrowError> { + let mut variant_builder = VariantBuilder::new(); + let mut object_builder = variant_builder.new_object(); + let mut inner_list_builder = object_builder.new_list("booleans"); + inner_list_builder.append_value(Variant::BooleanTrue); + inner_list_builder.append_value(Variant::BooleanFalse); + inner_list_builder.finish(); + object_builder.insert("null", Variant::Null); + let mut inner_list_builder = object_builder.new_list("numbers"); + inner_list_builder.append_value(Variant::Int8(4)); + inner_list_builder.append_value(Variant::Double(-3e0)); + inner_list_builder.append_value(Variant::Double(1001e-3)); + inner_list_builder.finish(); + object_builder.finish().unwrap(); + let (metadata, value) = variant_builder.finish(); + let variant = Variant::try_new(&metadata, &value)?; + JsonToVariantTest { + json: "{\"numbers\": [4, -3e0, 1001e-3], \"null\": null, \"booleans\": [true, false]}", + expected: variant, + } + .run() + } + + #[test] + fn test_json_to_variant_object_very_large() -> Result<(), ArrowError> { + // 256 elements (keys: 000-255) - each element is an object of 256 elements (240-495) - each + // element a list of numbers from 0-127 + let keys: Vec = (0..=255).map(|n| format!("{n:03}")).collect(); + let innermost_list: String = format!( + "[{}]", + (0..=127) + .map(|n| format!("{n}")) + .collect::>() + .join(",") + ); + let inner_keys: Vec = (240..=495).map(|n| format!("{n}")).collect(); + let inner_object = format!( + "{{{}:{}}}", + inner_keys + .iter() + .map(|k| format!("\"{k}\"")) + .collect::>() + .join(format!(":{innermost_list},").as_str()), + innermost_list + ); + let json = format!( + "{{{}:{}}}", + keys.iter() + .map(|k| format!("\"{k}\"")) + .collect::>() + .join(format!(":{inner_object},").as_str()), + inner_object + ); + // Manually verify raw JSON value size + let mut variant_builder = VariantBuilder::new(); + json_to_variant(&json, &mut variant_builder)?; + let (metadata, value) = variant_builder.finish(); + let v = Variant::try_new(&metadata, &value)?; + let output_string = variant_to_json_string(&v)?; + assert_eq!(output_string, json); + // Verify metadata size = 1 + 2 + 2 * 497 + 3 * 496 + assert_eq!(metadata.len(), 2485); + // Verify value size. + // Size of innermost_list: 1 + 1 + 258 + 256 = 516 + // Size of inner object: 1 + 4 + 256 + 257 * 3 + 256 * 516 = 133128 + // Size of json: 1 + 4 + 512 + 1028 + 256 * 133128 = 34082313 + assert_eq!(value.len(), 34082313); + + let mut variant_builder = VariantBuilder::new(); + let mut object_builder = variant_builder.new_object(); + keys.iter().for_each(|key| { + let mut inner_object_builder = object_builder.new_object(key); + inner_keys.iter().for_each(|inner_key| { + let mut list_builder = inner_object_builder.new_list(inner_key); + for i in 0..=127 { + list_builder.append_value(Variant::Int8(i)); + } + list_builder.finish(); + }); + inner_object_builder.finish().unwrap(); + }); + object_builder.finish().unwrap(); + let (metadata, value) = variant_builder.finish(); + let variant = Variant::try_new(&metadata, &value)?; + + JsonToVariantTest { + json: &json, + expected: variant, + } + .run() + } + + #[test] + fn test_json_to_variant_unicode() -> Result<(), ArrowError> { + let json = "{\"爱\":\"अ\",\"a\":1}"; + let mut variant_builder = VariantBuilder::new(); + json_to_variant(json, &mut variant_builder)?; + let (metadata, value) = variant_builder.finish(); + let v = Variant::try_new(&metadata, &value)?; + let output_string = variant_to_json_string(&v)?; + assert_eq!(output_string, "{\"a\":1,\"爱\":\"अ\"}"); + let mut variant_builder = VariantBuilder::new(); + let mut object_builder = variant_builder.new_object(); + object_builder.insert("a", Variant::Int8(1)); + object_builder.insert("爱", Variant::ShortString(ShortString::try_new("अ")?)); + object_builder.finish().unwrap(); + let (metadata, value) = variant_builder.finish(); + let variant = Variant::try_new(&metadata, &value)?; + + assert_eq!( + value, + &[2u8, 2u8, 0u8, 1u8, 0u8, 2u8, 6u8, 12u8, 1u8, 13u8, 0xe0u8, 0xa4u8, 0x85u8] + ); + assert_eq!( + metadata, + &[1u8, 2u8, 0u8, 1u8, 4u8, 97u8, 0xe7u8, 0x88u8, 0xb1u8] + ); + JsonToVariantTest { + json, + expected: variant, + } + .run() + } +} diff --git a/parquet-variant-json/src/to_json.rs b/parquet-variant-json/src/to_json.rs index aa66b6801b32..55e024a66c4a 100644 --- a/parquet-variant-json/src/to_json.rs +++ b/parquet-variant-json/src/to_json.rs @@ -21,7 +21,7 @@ use base64::{engine::general_purpose, Engine as _}; use serde_json::Value; use std::io::Write; -use parquet_variant::variant::{Variant, VariantList, VariantObject}; +use parquet_variant::{Variant, VariantList, VariantObject}; // Format string constants to avoid duplication and reduce errors const DATE_FORMAT: &str = "%Y-%m-%d"; @@ -61,7 +61,8 @@ fn format_binary_base64(bytes: &[u8]) -> String { /// /// /// ```rust -/// # use parquet_variant::{Variant, variant_to_json}; +/// # use parquet_variant::{Variant}; +/// # use parquet_variant_json::variant_to_json; /// # use arrow_schema::ArrowError; /// let variant = Variant::from("Hello, World!"); /// let mut buffer = Vec::new(); @@ -72,7 +73,8 @@ fn format_binary_base64(bytes: &[u8]) -> String { /// /// # Example: Create a [`Variant::Object`] and convert to JSON /// ```rust -/// # use parquet_variant::{Variant, VariantBuilder, variant_to_json}; +/// # use parquet_variant::{Variant, VariantBuilder}; +/// # use parquet_variant_json::variant_to_json; /// # use arrow_schema::ArrowError; /// let mut builder = VariantBuilder::new(); /// // Create an object builder that will write fields to the object @@ -203,7 +205,8 @@ fn convert_array_to_json(buffer: &mut impl Write, arr: &VariantList) -> Result<( /// # Examples /// /// ```rust -/// # use parquet_variant::{Variant, variant_to_json_string}; +/// # use parquet_variant::{Variant}; +/// # use parquet_variant_json::variant_to_json_string; /// # use arrow_schema::ArrowError; /// let variant = Variant::Int32(42); /// let json = variant_to_json_string(&variant)?; @@ -222,7 +225,8 @@ fn convert_array_to_json(buffer: &mut impl Write, arr: &VariantList) -> Result<( /// ``` /// /// ```rust -/// # use parquet_variant::{Variant, VariantBuilder, variant_to_json_string}; +/// # use parquet_variant::{Variant, VariantBuilder}; +/// # use parquet_variant_json::variant_to_json_string; /// # use arrow_schema::ArrowError; /// let mut builder = VariantBuilder::new(); /// // Create an object builder that will write fields to the object @@ -263,7 +267,8 @@ pub fn variant_to_json_string(variant: &Variant) -> Result { /// # Examples /// /// ```rust -/// # use parquet_variant::{Variant, variant_to_json_value}; +/// # use parquet_variant::{Variant}; +/// # use parquet_variant_json::variant_to_json_value; /// # use serde_json::Value; /// # use arrow_schema::ArrowError; /// let variant = Variant::from("hello"); @@ -367,6 +372,7 @@ pub fn variant_to_json_value(variant: &Variant) -> Result { mod tests { use super::*; use chrono::{DateTime, NaiveDate, Utc}; + use parquet_variant::{VariantDecimal16, VariantDecimal4, VariantDecimal8}; #[test] fn test_decimal_edge_cases() -> Result<(), ArrowError> { diff --git a/parquet-variant/src/builder.rs b/parquet-variant/src/builder.rs index fe3090f70b8f..2d21dde6c471 100644 --- a/parquet-variant/src/builder.rs +++ b/parquet-variant/src/builder.rs @@ -756,9 +756,11 @@ impl<'a, 'b> ObjectBuilder<'a, 'b> { } } -/// Trait that abstracts functionality from Variant fconstruction implementations, namely -/// `VariantBuilder`, `ListBuilder` and `ObjectFieldBuilder` to minimize code duplication. -pub(crate) trait VariantBuilderExt<'m, 'v> { +/// Extends [`VariantBuilder`] to help building nested [`Variant`]s +/// +/// Allows users to append values to a [`VariantBuilder`], [`ListBuilder`] or +/// [`ObjectBuilder`]. using the same interface. +pub trait VariantBuilderExt<'m, 'v> { fn append_value(&mut self, value: impl Into>); fn new_list(&mut self) -> ListBuilder; diff --git a/parquet-variant/tests/test_json_to_variant.rs b/parquet-variant/tests/test_json_to_variant.rs deleted file mode 100644 index fd6056d02d9c..000000000000 --- a/parquet-variant/tests/test_json_to_variant.rs +++ /dev/null @@ -1,552 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Manually tests if parsing JSON strings to Variants returns the expected results. - -use arrow_schema::ArrowError; -use parquet_variant::{ - json_to_variant, variant_to_json_string, ShortString, Variant, VariantBuilder, - VariantDecimal16, VariantDecimal4, VariantDecimal8, -}; - -struct JsonToVariantTest<'a> { - json: &'a str, - expected: Variant<'a, 'a>, -} - -impl<'a> JsonToVariantTest<'a> { - fn run(self) -> Result<(), ArrowError> { - let mut variant_builder = VariantBuilder::new(); - json_to_variant(self.json, &mut variant_builder)?; - let (metadata, value) = variant_builder.finish(); - let variant = Variant::try_new(&metadata, &value)?; - assert_eq!(variant, self.expected); - Ok(()) - } -} - -#[test] -fn test_json_to_variant_null() -> Result<(), ArrowError> { - JsonToVariantTest { - json: "null", - expected: Variant::Null, - } - .run() -} - -#[test] -fn test_json_to_variant_boolean_true() -> Result<(), ArrowError> { - JsonToVariantTest { - json: "true", - expected: Variant::BooleanTrue, - } - .run() -} - -#[test] -fn test_json_to_variant_boolean_false() -> Result<(), ArrowError> { - JsonToVariantTest { - json: "false", - expected: Variant::BooleanFalse, - } - .run() -} - -#[test] -fn test_json_to_variant_int8_positive() -> Result<(), ArrowError> { - JsonToVariantTest { - json: " 127 ", - expected: Variant::Int8(127), - } - .run() -} - -#[test] -fn test_json_to_variant_int8_negative() -> Result<(), ArrowError> { - JsonToVariantTest { - json: " -128 ", - expected: Variant::Int8(-128), - } - .run() -} - -#[test] -fn test_json_to_variant_int16() -> Result<(), ArrowError> { - JsonToVariantTest { - json: " 27134 ", - expected: Variant::Int16(27134), - } - .run() -} - -#[test] -fn test_json_to_variant_int32() -> Result<(), ArrowError> { - JsonToVariantTest { - json: " -32767431 ", - expected: Variant::Int32(-32767431), - } - .run() -} - -#[test] -fn test_json_to_variant_int64() -> Result<(), ArrowError> { - JsonToVariantTest { - json: "92842754201389", - expected: Variant::Int64(92842754201389), - } - .run() -} - -#[ignore] -#[test] -fn test_json_to_variant_decimal4_basic() -> Result<(), ArrowError> { - JsonToVariantTest { - json: "1.23", - expected: Variant::from(VariantDecimal4::try_new(123, 2)?), - } - .run() -} - -#[ignore] -#[test] -fn test_json_to_variant_decimal4_large_positive() -> Result<(), ArrowError> { - JsonToVariantTest { - json: "99999999.9", - expected: Variant::from(VariantDecimal4::try_new(999999999, 1)?), - } - .run() -} - -#[ignore] -#[test] -fn test_json_to_variant_decimal4_large_negative() -> Result<(), ArrowError> { - JsonToVariantTest { - json: "-99999999.9", - expected: Variant::from(VariantDecimal4::try_new(-999999999, 1)?), - } - .run() -} - -#[ignore] -#[test] -fn test_json_to_variant_decimal4_small_positive() -> Result<(), ArrowError> { - JsonToVariantTest { - json: "0.999999999", - expected: Variant::from(VariantDecimal4::try_new(999999999, 9)?), - } - .run() -} - -#[ignore] -#[test] -fn test_json_to_variant_decimal4_tiny_positive() -> Result<(), ArrowError> { - JsonToVariantTest { - json: "0.000000001", - expected: Variant::from(VariantDecimal4::try_new(1, 9)?), - } - .run() -} - -#[ignore] -#[test] -fn test_json_to_variant_decimal4_small_negative() -> Result<(), ArrowError> { - JsonToVariantTest { - json: "-0.999999999", - expected: Variant::from(VariantDecimal4::try_new(-999999999, 9)?), - } - .run() -} - -#[ignore] -#[test] -fn test_json_to_variant_decimal8_positive() -> Result<(), ArrowError> { - JsonToVariantTest { - json: "999999999.0", - expected: Variant::from(VariantDecimal8::try_new(9999999990, 1)?), - } - .run() -} - -#[ignore] -#[test] -fn test_json_to_variant_decimal8_negative() -> Result<(), ArrowError> { - JsonToVariantTest { - json: "-999999999.0", - expected: Variant::from(VariantDecimal8::try_new(-9999999990, 1)?), - } - .run() -} - -#[ignore] -#[test] -fn test_json_to_variant_decimal8_high_precision() -> Result<(), ArrowError> { - JsonToVariantTest { - json: "0.999999999999999999", - expected: Variant::from(VariantDecimal8::try_new(999999999999999999, 18)?), - } - .run() -} - -#[ignore] -#[test] -fn test_json_to_variant_decimal8_large_with_scale() -> Result<(), ArrowError> { - JsonToVariantTest { - json: "9999999999999999.99", - expected: Variant::from(VariantDecimal8::try_new(999999999999999999, 2)?), - } - .run() -} - -#[ignore] -#[test] -fn test_json_to_variant_decimal8_large_negative_with_scale() -> Result<(), ArrowError> { - JsonToVariantTest { - json: "-9999999999999999.99", - expected: Variant::from(VariantDecimal8::try_new(-999999999999999999, 2)?), - } - .run() -} - -#[ignore] -#[test] -fn test_json_to_variant_decimal16_large_integer() -> Result<(), ArrowError> { - JsonToVariantTest { - json: "9999999999999999999", // integer larger than i64 - expected: Variant::from(VariantDecimal16::try_new(9999999999999999999, 0)?), - } - .run() -} - -#[ignore] -#[test] -fn test_json_to_variant_decimal16_high_precision() -> Result<(), ArrowError> { - JsonToVariantTest { - json: "0.9999999999999999999", - expected: Variant::from(VariantDecimal16::try_new(9999999999999999999, 19)?), - } - .run() -} - -#[ignore] -#[test] -fn test_json_to_variant_decimal16_max_value() -> Result<(), ArrowError> { - JsonToVariantTest { - json: "79228162514264337593543950335", // 2 ^ 96 - 1 - expected: Variant::from(VariantDecimal16::try_new(79228162514264337593543950335, 0)?), - } - .run() -} - -#[ignore] -#[test] -fn test_json_to_variant_decimal16_max_scale() -> Result<(), ArrowError> { - JsonToVariantTest { - json: "7.9228162514264337593543950335", // using scale higher than this falls into double - // since the max scale is 28. - expected: Variant::from(VariantDecimal16::try_new( - 79228162514264337593543950335, - 28, - )?), - } - .run() -} - -#[test] -fn test_json_to_variant_double_precision() -> Result<(), ArrowError> { - JsonToVariantTest { - json: "0.79228162514264337593543950335", - expected: Variant::Double(0.792_281_625_142_643_4_f64), - } - .run() -} - -#[test] -fn test_json_to_variant_double_scientific_positive() -> Result<(), ArrowError> { - JsonToVariantTest { - json: "15e-1", - expected: Variant::Double(15e-1f64), - } - .run() -} - -#[test] -fn test_json_to_variant_double_scientific_negative() -> Result<(), ArrowError> { - JsonToVariantTest { - json: "-15e-1", - expected: Variant::Double(-15e-1f64), - } - .run() -} - -#[test] -fn test_json_to_variant_short_string() -> Result<(), ArrowError> { - JsonToVariantTest { - json: "\"harsh\"", - expected: Variant::ShortString(ShortString::try_new("harsh")?), - } - .run() -} - -#[test] -fn test_json_to_variant_short_string_max_length() -> Result<(), ArrowError> { - JsonToVariantTest { - json: &format!("\"{}\"", "a".repeat(63)), - expected: Variant::ShortString(ShortString::try_new(&"a".repeat(63))?), - } - .run() -} - -#[test] -fn test_json_to_variant_long_string() -> Result<(), ArrowError> { - JsonToVariantTest { - json: &format!("\"{}\"", "a".repeat(64)), - expected: Variant::String(&"a".repeat(64)), - } - .run() -} - -#[test] -fn test_json_to_variant_very_long_string() -> Result<(), ArrowError> { - JsonToVariantTest { - json: &format!("\"{}\"", "b".repeat(100000)), - expected: Variant::String(&"b".repeat(100000)), - } - .run() -} - -#[test] -fn test_json_to_variant_array_simple() -> Result<(), ArrowError> { - let mut variant_builder = VariantBuilder::new(); - let mut list_builder = variant_builder.new_list(); - list_builder.append_value(Variant::Int8(127)); - list_builder.append_value(Variant::Int16(128)); - list_builder.append_value(Variant::Int32(-32767431)); - list_builder.finish(); - let (metadata, value) = variant_builder.finish(); - let variant = Variant::try_new(&metadata, &value)?; - - JsonToVariantTest { - json: "[127, 128, -32767431]", - expected: variant, - } - .run() -} - -#[test] -fn test_json_to_variant_array_with_object() -> Result<(), ArrowError> { - let mut variant_builder = VariantBuilder::new(); - let mut list_builder = variant_builder.new_list(); - let mut object_builder_inner = list_builder.new_object(); - object_builder_inner.insert("age", Variant::Int8(32)); - object_builder_inner.finish().unwrap(); - list_builder.append_value(Variant::Int16(128)); - list_builder.append_value(Variant::BooleanFalse); - list_builder.finish(); - let (metadata, value) = variant_builder.finish(); - let variant = Variant::try_new(&metadata, &value)?; - - JsonToVariantTest { - json: "[{\"age\": 32}, 128, false]", - expected: variant, - } - .run() -} - -#[test] -fn test_json_to_variant_array_large_u16_offset() -> Result<(), ArrowError> { - // u16 offset - 128 i8's + 1 "true" = 257 bytes - let mut variant_builder = VariantBuilder::new(); - let mut list_builder = variant_builder.new_list(); - for _ in 0..128 { - list_builder.append_value(Variant::Int8(1)); - } - list_builder.append_value(Variant::BooleanTrue); - list_builder.finish(); - let (metadata, value) = variant_builder.finish(); - let variant = Variant::try_new(&metadata, &value)?; - - JsonToVariantTest { - json: &format!("[{} true]", "1, ".repeat(128)), - expected: variant, - } - .run() -} - -#[test] -fn test_json_to_variant_array_nested_large() -> Result<(), ArrowError> { - // verify u24, and large_size - let mut variant_builder = VariantBuilder::new(); - let mut list_builder = variant_builder.new_list(); - for _ in 0..256 { - let mut list_builder_inner = list_builder.new_list(); - for _ in 0..255 { - list_builder_inner.append_value(Variant::Null); - } - list_builder_inner.finish(); - } - list_builder.finish(); - let (metadata, value) = variant_builder.finish(); - let variant = Variant::try_new(&metadata, &value)?; - let intermediate = format!("[{}]", vec!["null"; 255].join(", ")); - let json = format!("[{}]", vec![intermediate; 256].join(", ")); - JsonToVariantTest { - json: json.as_str(), - expected: variant, - } - .run() -} - -#[test] -fn test_json_to_variant_object_simple() -> Result<(), ArrowError> { - let mut variant_builder = VariantBuilder::new(); - let mut object_builder = variant_builder.new_object(); - object_builder.insert("a", Variant::Int8(3)); - object_builder.insert("b", Variant::Int8(2)); - object_builder.finish().unwrap(); - let (metadata, value) = variant_builder.finish(); - let variant = Variant::try_new(&metadata, &value)?; - JsonToVariantTest { - json: "{\"b\": 2, \"a\": 1, \"a\": 3}", - expected: variant, - } - .run() -} - -#[test] -fn test_json_to_variant_object_complex() -> Result<(), ArrowError> { - let mut variant_builder = VariantBuilder::new(); - let mut object_builder = variant_builder.new_object(); - let mut inner_list_builder = object_builder.new_list("booleans"); - inner_list_builder.append_value(Variant::BooleanTrue); - inner_list_builder.append_value(Variant::BooleanFalse); - inner_list_builder.finish(); - object_builder.insert("null", Variant::Null); - let mut inner_list_builder = object_builder.new_list("numbers"); - inner_list_builder.append_value(Variant::Int8(4)); - inner_list_builder.append_value(Variant::Double(-3e0)); - inner_list_builder.append_value(Variant::Double(1001e-3)); - inner_list_builder.finish(); - object_builder.finish().unwrap(); - let (metadata, value) = variant_builder.finish(); - let variant = Variant::try_new(&metadata, &value)?; - JsonToVariantTest { - json: "{\"numbers\": [4, -3e0, 1001e-3], \"null\": null, \"booleans\": [true, false]}", - expected: variant, - } - .run() -} - -#[test] -fn test_json_to_variant_object_very_large() -> Result<(), ArrowError> { - // 256 elements (keys: 000-255) - each element is an object of 256 elements (240-495) - each - // element a list of numbers from 0-127 - let keys: Vec = (0..=255).map(|n| format!("{n:03}")).collect(); - let innermost_list: String = format!( - "[{}]", - (0..=127) - .map(|n| format!("{n}")) - .collect::>() - .join(",") - ); - let inner_keys: Vec = (240..=495).map(|n| format!("{n}")).collect(); - let inner_object = format!( - "{{{}:{}}}", - inner_keys - .iter() - .map(|k| format!("\"{k}\"")) - .collect::>() - .join(format!(":{innermost_list},").as_str()), - innermost_list - ); - let json = format!( - "{{{}:{}}}", - keys.iter() - .map(|k| format!("\"{k}\"")) - .collect::>() - .join(format!(":{inner_object},").as_str()), - inner_object - ); - // Manually verify raw JSON value size - let mut variant_builder = VariantBuilder::new(); - json_to_variant(&json, &mut variant_builder)?; - let (metadata, value) = variant_builder.finish(); - let v = parquet_variant::Variant::try_new(&metadata, &value)?; - let output_string = variant_to_json_string(&v)?; - assert_eq!(output_string, json); - // Verify metadata size = 1 + 2 + 2 * 497 + 3 * 496 - assert_eq!(metadata.len(), 2485); - // Verify value size. - // Size of innermost_list: 1 + 1 + 258 + 256 = 516 - // Size of inner object: 1 + 4 + 256 + 257 * 3 + 256 * 516 = 133128 - // Size of json: 1 + 4 + 512 + 1028 + 256 * 133128 = 34082313 - assert_eq!(value.len(), 34082313); - - let mut variant_builder = VariantBuilder::new(); - let mut object_builder = variant_builder.new_object(); - keys.iter().for_each(|key| { - let mut inner_object_builder = object_builder.new_object(key); - inner_keys.iter().for_each(|inner_key| { - let mut list_builder = inner_object_builder.new_list(inner_key); - for i in 0..=127 { - list_builder.append_value(Variant::Int8(i)); - } - list_builder.finish(); - }); - inner_object_builder.finish().unwrap(); - }); - object_builder.finish().unwrap(); - let (metadata, value) = variant_builder.finish(); - let variant = Variant::try_new(&metadata, &value)?; - - JsonToVariantTest { - json: &json, - expected: variant, - } - .run() -} - -#[test] -fn test_json_to_variant_unicode() -> Result<(), ArrowError> { - let json = "{\"爱\":\"अ\",\"a\":1}"; - let mut variant_builder = VariantBuilder::new(); - json_to_variant(json, &mut variant_builder)?; - let (metadata, value) = variant_builder.finish(); - let v = parquet_variant::Variant::try_new(&metadata, &value)?; - let output_string = variant_to_json_string(&v)?; - assert_eq!(output_string, "{\"a\":1,\"爱\":\"अ\"}"); - let mut variant_builder = VariantBuilder::new(); - let mut object_builder = variant_builder.new_object(); - object_builder.insert("a", Variant::Int8(1)); - object_builder.insert("爱", Variant::ShortString(ShortString::try_new("अ")?)); - object_builder.finish().unwrap(); - let (metadata, value) = variant_builder.finish(); - let variant = Variant::try_new(&metadata, &value)?; - - assert_eq!( - value, - &[2u8, 2u8, 0u8, 1u8, 0u8, 2u8, 6u8, 12u8, 1u8, 13u8, 0xe0u8, 0xa4u8, 0x85u8] - ); - assert_eq!( - metadata, - &[1u8, 2u8, 0u8, 1u8, 4u8, 97u8, 0xe7u8, 0x88u8, 0xb1u8] - ); - JsonToVariantTest { - json, - expected: variant, - } - .run() -} From d4910936e2b41e33823b8a0a98754912184717fe Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 3 Jul 2025 15:13:23 -0400 Subject: [PATCH 4/4] remove unused --- parquet-variant-json/Cargo.toml | 1 - parquet-variant/Cargo.toml | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/parquet-variant-json/Cargo.toml b/parquet-variant-json/Cargo.toml index d8e1ef7cde3d..830a3c060011 100644 --- a/parquet-variant-json/Cargo.toml +++ b/parquet-variant-json/Cargo.toml @@ -39,7 +39,6 @@ parquet-variant = { path = "../parquet-variant" } chrono = { workspace = true } serde_json = "1.0" base64 = "0.22" -indexmap = "2.10.0" [lib] diff --git a/parquet-variant/Cargo.toml b/parquet-variant/Cargo.toml index 708b614cf4b7..3edfbb76ed32 100644 --- a/parquet-variant/Cargo.toml +++ b/parquet-variant/Cargo.toml @@ -29,14 +29,12 @@ keywords = ["arrow", "parquet", "variant"] readme = "README.md" edition = { workspace = true } # needs a newer version than workspace due to -# rror: `Option::::unwrap` is not yet stable as a const fn +# Error: `Option::::unwrap` is not yet stable as a const fn rust-version = "1.83" [dependencies] arrow-schema = { workspace = true } chrono = { workspace = true } -serde_json = "1.0" -base64 = "0.22" indexmap = "2.10.0"