From 85669e44870907ef443fd9e13ad03be36fd0729b Mon Sep 17 00:00:00 2001 From: Job Doesburg Date: Thu, 12 Feb 2026 17:38:00 +0100 Subject: [PATCH 01/11] Add pad_to --- src/lib/data/json/data.rs | 560 +++++++++++++++++++++++++++++++++ src/lib/data/json/macros.rs | 12 + src/lib/data/json/structure.rs | 386 +++++++++++++++++++++++ src/lib/data/long.rs | 394 ++++++++++++++++++++++- src/lib/data/py/json.rs | 57 ++++ src/lib/data/py/long.rs | 43 +++ src/lib/data/py/records.rs | 78 ++++- src/lib/data/records.rs | 78 +++++ src/lib/data/wasm/json.rs | 64 ++++ src/lib/data/wasm/long.rs | 49 +++ src/lib/data/wasm/records.rs | 76 ++++- 11 files changed, 1786 insertions(+), 11 deletions(-) diff --git a/src/lib/data/json/data.rs b/src/lib/data/json/data.rs index 1bfd056..485256f 100644 --- a/src/lib/data/json/data.rs +++ b/src/lib/data/json/data.rs @@ -44,6 +44,15 @@ pub enum JsonError { #[error("failed to parse string: {0}")] StringPadding(String), + + #[error("structure mismatch: expected {expected:?}, got {got:?}")] + StructureMismatch { + expected: super::structure::JSONStructure, + got: super::structure::JSONStructure, + }, + + #[error("cannot normalize: current size {current} exceeds target size {target}")] + SizeExceedsTarget { current: usize, target: usize }, } /// A JSON value where primitive types are stored as unencrypted PEP types. /// @@ -221,6 +230,229 @@ impl PEPJSONValue { } } } + + /// Pads this PEPJSONValue to match a target structure by adding external padding blocks. + /// + /// This method adds external padding blocks (separate from PKCS#7 padding) to + /// `LongString` and `LongPseudonym` variants to ensure all instances have the same + /// number of blocks when encrypted. This is necessary for batch transcryption where + /// all values must have identical structure. + /// + /// The padding uses full PKCS#7 padding blocks which are automatically detected and + /// stripped during decoding, ensuring the original values are perfectly preserved. + /// + /// # Parameters + /// + /// - `structure`: The target structure specifying the number of blocks for each field + /// + /// # Returns + /// + /// Returns a padded `PEPJSONValue` with padding blocks added where necessary. + /// + /// # Errors + /// + /// Returns an error if: + /// - The current structure doesn't match the target structure type + /// - The current size exceeds the target size (cannot pad by removing blocks) + /// + /// # Example + /// + /// ```no_run + /// use libpep::data::json::data::PEPJSONValue; + /// use libpep::data::json::structure::JSONStructure; + /// use serde_json::json; + /// + /// let value1 = PEPJSONValue::from_value(&json!("hi")); + /// let value2 = PEPJSONValue::from_value(&json!("hello world")); + /// + /// // value2 has more blocks than value1 + /// // Pad value1 to match value2's structure + /// let target = JSONStructure::String(2); + /// let padded = value1.pad_to(&target).unwrap(); + /// ``` + pub fn pad_to(&self, structure: &super::structure::JSONStructure) -> Result { + use super::structure::JSONStructure; + + match (self, structure) { + (Self::Null, JSONStructure::Null) => Ok(Self::Null), + (Self::Bool(attr), JSONStructure::Bool) => Ok(Self::Bool(*attr)), + (Self::Number(attr), JSONStructure::Number) => Ok(Self::Number(*attr)), + + // Short string (1 block) + (Self::String(attr), JSONStructure::String(1)) => Ok(Self::String(*attr)), + + // Short string needs to be expanded to long string + #[cfg(feature = "long")] + (Self::String(attr), JSONStructure::String(target_blocks)) if *target_blocks > 1 => { + // Convert to LongAttribute with 1 block, then pad + let long_attr = LongAttribute::from(vec![*attr]); + let padded = long_attr.pad_to(*target_blocks).map_err(|e| { + if e.kind() == std::io::ErrorKind::InvalidInput { + JsonError::SizeExceedsTarget { + current: long_attr.len(), + target: *target_blocks, + } + } else { + JsonError::StringPadding(format!("{e:?}")) + } + })?; + Ok(Self::LongString(padded)) + } + + // Long string normalization + #[cfg(feature = "long")] + (Self::LongString(long_attr), JSONStructure::String(target_blocks)) => { + let padded = long_attr.pad_to(*target_blocks).map_err(|e| { + if e.kind() == std::io::ErrorKind::InvalidInput { + JsonError::SizeExceedsTarget { + current: long_attr.len(), + target: *target_blocks, + } + } else { + JsonError::StringPadding(format!("{e:?}")) + } + })?; + Ok(Self::LongString(padded)) + } + + // Short pseudonym (1 block) + (Self::Pseudonym(pseudo), JSONStructure::Pseudonym(1)) => Ok(Self::Pseudonym(*pseudo)), + + // Short pseudonym needs to be expanded to long pseudonym + #[cfg(feature = "long")] + (Self::Pseudonym(pseudo), JSONStructure::Pseudonym(target_blocks)) + if *target_blocks > 1 => + { + // Convert to LongPseudonym with 1 block, then pad + let long_pseudo = LongPseudonym::from(vec![*pseudo]); + let padded = long_pseudo.pad_to(*target_blocks).map_err(|e| { + if e.kind() == std::io::ErrorKind::InvalidInput { + JsonError::SizeExceedsTarget { + current: long_pseudo.len(), + target: *target_blocks, + } + } else { + JsonError::StringPadding(format!("{e:?}")) + } + })?; + Ok(Self::LongPseudonym(padded)) + } + + // Long pseudonym normalization + #[cfg(feature = "long")] + (Self::LongPseudonym(long_pseudo), JSONStructure::Pseudonym(target_blocks)) => { + let padded = long_pseudo.pad_to(*target_blocks).map_err(|e| { + if e.kind() == std::io::ErrorKind::InvalidInput { + JsonError::SizeExceedsTarget { + current: long_pseudo.len(), + target: *target_blocks, + } + } else { + JsonError::StringPadding(format!("{e:?}")) + } + })?; + Ok(Self::LongPseudonym(padded)) + } + + // Array padding - recursively pad each element + (Self::Array(arr), JSONStructure::Array(target_structures)) => { + if arr.len() != target_structures.len() { + return Err(JsonError::StructureMismatch { + expected: structure.clone(), + got: self.structure(), + }); + } + + let padded: Result, _> = arr + .iter() + .zip(target_structures.iter()) + .map(|(value, target)| value.pad_to(target)) + .collect(); + + Ok(Self::Array(padded?)) + } + + // Object padding - recursively pad each field + (Self::Object(obj), JSONStructure::Object(target_fields)) => { + let mut padded = HashMap::new(); + + for (key, target_struct) in target_fields { + match obj.get(key) { + Some(value) => { + padded.insert(key.clone(), value.pad_to(target_struct)?); + } + None => { + return Err(JsonError::StructureMismatch { + expected: structure.clone(), + got: self.structure(), + }); + } + } + } + + // Check for extra fields in the object + if obj.len() != target_fields.len() { + return Err(JsonError::StructureMismatch { + expected: structure.clone(), + got: self.structure(), + }); + } + + Ok(Self::Object(padded)) + } + + // Mismatched structure types + _ => Err(JsonError::StructureMismatch { + expected: structure.clone(), + got: self.structure(), + }), + } + } + + /// Get the structure/shape of this PEPJSONValue. + /// + /// This returns a structure descriptor that captures the type and block count + /// of each field, without including the actual data values. + /// + /// # Example + /// + /// ```no_run + /// use libpep::data::json::data::PEPJSONValue; + /// use libpep::data::json::structure::JSONStructure; + /// use serde_json::json; + /// + /// let value = PEPJSONValue::from_value(&json!({ + /// "name": "Alice", + /// "age": 30 + /// })); + /// + /// let structure = value.structure(); + /// // structure describes the shape: Object with String(1) and Number fields + /// ``` + pub fn structure(&self) -> super::structure::JSONStructure { + use super::structure::JSONStructure; + + match self { + Self::Null => JSONStructure::Null, + Self::Bool(_) => JSONStructure::Bool, + Self::Number(_) => JSONStructure::Number, + Self::String(_) => JSONStructure::String(1), + #[cfg(feature = "long")] + Self::LongString(long_attr) => JSONStructure::String(long_attr.len()), + Self::Pseudonym(_) => JSONStructure::Pseudonym(1), + #[cfg(feature = "long")] + Self::LongPseudonym(long_pseudo) => JSONStructure::Pseudonym(long_pseudo.len()), + Self::Array(arr) => JSONStructure::Array(arr.iter().map(|v| v.structure()).collect()), + Self::Object(obj) => { + let mut fields: Vec<_> = obj + .iter() + .map(|(k, v)| (k.clone(), v.structure())) + .collect(); + fields.sort_by(|a, b| a.0.cmp(&b.0)); + JSONStructure::Object(fields) + } + } + } } impl Encryptable for PEPJSONValue { @@ -948,4 +1180,332 @@ mod tests { let decrypted2 = decrypt(&encrypted2, &keys); assert_eq!(decrypted1, decrypted2); } + + #[test] + #[cfg(feature = "long")] + fn normalize_short_string_to_long() { + use super::super::structure::JSONStructure; + + // Short string (1 block) + let short_value = PEPJSONValue::from_value(&json!("hi")); + assert_eq!(short_value.structure(), JSONStructure::String(1)); + + // Normalize to 3 blocks + let normalized = short_value.pad_to(&JSONStructure::String(3)).unwrap(); + assert_eq!(normalized.structure(), JSONStructure::String(3)); + + // Verify it's now a LongString + match normalized { + PEPJSONValue::LongString(ref long_attr) => { + assert_eq!(long_attr.len(), 3); + } + _ => panic!("Expected LongString after normalization"), + } + } + + #[test] + #[cfg(feature = "long")] + fn normalize_long_string_adds_padding() { + use super::super::structure::JSONStructure; + + // Long string (2 blocks) + let long_value = PEPJSONValue::from_value(&json!("This is a longer string")); + let initial_structure = long_value.structure(); + + // Get current block count + let current_blocks = match initial_structure { + JSONStructure::String(n) => n, + _ => panic!("Expected String structure"), + }; + + // Normalize to more blocks + let target_blocks = current_blocks + 2; + let normalized = long_value + .pad_to(&JSONStructure::String(target_blocks)) + .unwrap(); + assert_eq!(normalized.structure(), JSONStructure::String(target_blocks)); + + // Verify block count increased + match normalized { + PEPJSONValue::LongString(ref long_attr) => { + assert_eq!(long_attr.len(), target_blocks); + } + _ => panic!("Expected LongString"), + } + } + + #[test] + #[cfg(feature = "long")] + fn normalize_strings_different_sizes_encrypt_decrypt() { + let mut rng = rand::rng(); + let keys = make_test_keys(); + + // Create strings of different sizes + let short = PEPJSONValue::from_value(&json!("hi")); + let medium = PEPJSONValue::from_value(&json!("hello world")); + let long = + PEPJSONValue::from_value(&json!("This is a much longer string with more content")); + + // Find the maximum block count + let max_blocks = [&short, &medium, &long] + .iter() + .map(|v| match v.structure() { + super::super::structure::JSONStructure::String(n) => n, + _ => 0, + }) + .max() + .unwrap(); + + // Normalize all to the same structure + let target = super::super::structure::JSONStructure::String(max_blocks); + let short_normalized = short.pad_to(&target).unwrap(); + let medium_normalized = medium.pad_to(&target).unwrap(); + let long_normalized = long.pad_to(&target).unwrap(); + + // All should have the same structure now + assert_eq!(short_normalized.structure(), target); + assert_eq!(medium_normalized.structure(), target); + assert_eq!(long_normalized.structure(), target); + + // Encrypt all values + let short_encrypted = encrypt(&short_normalized, &keys, &mut rng); + let medium_encrypted = encrypt(&medium_normalized, &keys, &mut rng); + let long_encrypted = encrypt(&long_normalized, &keys, &mut rng); + + // All encrypted values should have the same structure + assert_eq!(short_encrypted.structure(), medium_encrypted.structure()); + assert_eq!(medium_encrypted.structure(), long_encrypted.structure()); + + // Decrypt and verify original values are preserved + #[cfg(feature = "elgamal3")] + { + let short_decrypted = decrypt(&short_encrypted, &keys).unwrap(); + let medium_decrypted = decrypt(&medium_encrypted, &keys).unwrap(); + let long_decrypted = decrypt(&long_encrypted, &keys).unwrap(); + + assert_eq!(json!("hi"), short_decrypted.to_value().unwrap()); + assert_eq!(json!("hello world"), medium_decrypted.to_value().unwrap()); + assert_eq!( + json!("This is a much longer string with more content"), + long_decrypted.to_value().unwrap() + ); + } + + #[cfg(not(feature = "elgamal3"))] + { + let short_decrypted = decrypt(&short_encrypted, &keys); + let medium_decrypted = decrypt(&medium_encrypted, &keys); + let long_decrypted = decrypt(&long_encrypted, &keys); + + assert_eq!(json!("hi"), short_decrypted.to_value().unwrap()); + assert_eq!(json!("hello world"), medium_decrypted.to_value().unwrap()); + assert_eq!( + json!("This is a much longer string with more content"), + long_decrypted.to_value().unwrap() + ); + } + } + + #[test] + #[cfg(feature = "long")] + fn normalize_pseudonyms_different_sizes() { + use super::super::structure::JSONStructure; + use crate::pep_json; + + let mut rng = rand::rng(); + let keys = make_test_keys(); + + // Create pseudonyms of different sizes + let short_pseudo = pep_json!(pseudonym("user123")); + let long_pseudo = pep_json!(pseudonym("user@example.com.with.a.very.long.domain")); + + // Find the maximum block count + let max_blocks = [&short_pseudo, &long_pseudo] + .iter() + .map(|v| match v.structure() { + JSONStructure::Pseudonym(n) => n, + _ => 0, + }) + .max() + .unwrap(); + + // Normalize both to the same structure + let target = JSONStructure::Pseudonym(max_blocks); + let short_normalized = short_pseudo.pad_to(&target).unwrap(); + let long_normalized = long_pseudo.pad_to(&target).unwrap(); + + // Both should have the same structure now + assert_eq!(short_normalized.structure(), target); + assert_eq!(long_normalized.structure(), target); + + // Encrypt and verify structures match + let short_encrypted = encrypt(&short_normalized, &keys, &mut rng); + let long_encrypted = encrypt(&long_normalized, &keys, &mut rng); + + assert_eq!(short_encrypted.structure(), long_encrypted.structure()); + + // Decrypt and verify original values are preserved + #[cfg(feature = "elgamal3")] + { + let short_decrypted = decrypt(&short_encrypted, &keys).unwrap(); + let long_decrypted = decrypt(&long_encrypted, &keys).unwrap(); + + assert_eq!(json!("user123"), short_decrypted.to_value().unwrap()); + assert_eq!( + json!("user@example.com.with.a.very.long.domain"), + long_decrypted.to_value().unwrap() + ); + } + + #[cfg(not(feature = "elgamal3"))] + { + let short_decrypted = decrypt(&short_encrypted, &keys); + let long_decrypted = decrypt(&long_encrypted, &keys); + + assert_eq!(json!("user123"), short_decrypted.to_value().unwrap()); + assert_eq!( + json!("user@example.com.with.a.very.long.domain"), + long_decrypted.to_value().unwrap() + ); + } + } + + #[test] + #[cfg(feature = "long")] + fn normalize_nested_objects_different_string_sizes() { + use super::super::structure::JSONStructure; + + let mut rng = rand::rng(); + let keys = make_test_keys(); + + // Create two objects with strings of different sizes + let obj1 = PEPJSONValue::from_value(&json!({ + "name": "Alice", + "email": "a@b.c" + })); + + let obj2 = PEPJSONValue::from_value(&json!({ + "name": "Bob", + "email": "bob.smith@example.com" + })); + + // Get structures + let struct1 = obj1.structure(); + let struct2 = obj2.structure(); + + // Use the public unify_structures function + let unified = super::super::structure::unify_structures(&[struct1, struct2]).unwrap(); + + // Normalize both objects + let obj1_normalized = obj1.pad_to(&unified).unwrap(); + let obj2_normalized = obj2.pad_to(&unified).unwrap(); + + // Both should have the same structure now + assert_eq!(obj1_normalized.structure(), obj2_normalized.structure()); + + // Encrypt both + let obj1_encrypted = encrypt(&obj1_normalized, &keys, &mut rng); + let obj2_encrypted = encrypt(&obj2_normalized, &keys, &mut rng); + + // Structures should match + assert_eq!(obj1_encrypted.structure(), obj2_encrypted.structure()); + + // Decrypt and verify original values + #[cfg(feature = "elgamal3")] + { + let obj1_decrypted = decrypt(&obj1_encrypted, &keys).unwrap(); + let obj2_decrypted = decrypt(&obj2_encrypted, &keys).unwrap(); + + assert_eq!( + json!({"name": "Alice", "email": "a@b.c"}), + obj1_decrypted.to_value().unwrap() + ); + assert_eq!( + json!({"name": "Bob", "email": "bob.smith@example.com"}), + obj2_decrypted.to_value().unwrap() + ); + } + + #[cfg(not(feature = "elgamal3"))] + { + let obj1_decrypted = decrypt(&obj1_encrypted, &keys); + let obj2_decrypted = decrypt(&obj2_encrypted, &keys); + + assert_eq!( + json!({"name": "Alice", "email": "a@b.c"}), + obj1_decrypted.to_value().unwrap() + ); + assert_eq!( + json!({"name": "Bob", "email": "bob.smith@example.com"}), + obj2_decrypted.to_value().unwrap() + ); + } + } + + #[test] + #[cfg(feature = "long")] + fn normalize_errors_when_size_exceeds_target() { + use super::super::structure::JSONStructure; + + // Create a long string (multiple blocks) + let long_value = PEPJSONValue::from_value(&json!( + "This is a very long string that will take multiple blocks" + )); + + let current_blocks = match long_value.structure() { + JSONStructure::String(n) => n, + _ => panic!("Expected String structure"), + }; + + // Try to normalize to fewer blocks - should fail + let result = long_value.pad_to(&JSONStructure::String(current_blocks - 1)); + assert!(result.is_err()); + + match result { + Err(JsonError::SizeExceedsTarget { current, target }) => { + assert_eq!(current, current_blocks); + assert_eq!(target, current_blocks - 1); + } + _ => panic!("Expected SizeExceedsTarget error"), + } + } + + #[test] + #[cfg(feature = "long")] + fn normalize_errors_on_structure_mismatch() { + use super::super::structure::JSONStructure; + + // Create a string value + let string_value = PEPJSONValue::from_value(&json!("hello")); + + // Try to normalize to a number structure - should fail + let result = string_value.pad_to(&JSONStructure::Number); + assert!(result.is_err()); + + match result { + Err(JsonError::StructureMismatch { expected, got }) => { + assert_eq!(expected, JSONStructure::Number); + assert_eq!(got, JSONStructure::String(1)); + } + _ => panic!("Expected StructureMismatch error"), + } + } + + #[test] + fn normalize_preserves_primitives() { + use super::super::structure::JSONStructure; + + // Test that null, bool, and number normalization works + let null_value = PEPJSONValue::from_value(&json!(null)); + let bool_value = PEPJSONValue::from_value(&json!(true)); + let number_value = PEPJSONValue::from_value(&json!(42)); + + let null_normalized = null_value.pad_to(&JSONStructure::Null).unwrap(); + let bool_normalized = bool_value.pad_to(&JSONStructure::Bool).unwrap(); + let number_normalized = number_value.pad_to(&JSONStructure::Number).unwrap(); + + assert_eq!(null_normalized, null_value); + assert_eq!(bool_normalized, bool_value); + assert_eq!(number_normalized, number_value); + } } diff --git a/src/lib/data/json/macros.rs b/src/lib/data/json/macros.rs index a2e30df..805a1b4 100644 --- a/src/lib/data/json/macros.rs +++ b/src/lib/data/json/macros.rs @@ -30,6 +30,18 @@ /// ``` #[macro_export] macro_rules! pep_json { + // Entry point for standalone pseudonym + (pseudonym($value:expr)) => {{ + let s = $value; + // Always try short first, then fall back to long if needed + match $crate::data::simple::Pseudonym::from_string_padded(s) { + Ok(pseudo) => $crate::data::json::data::PEPJSONValue::Pseudonym(pseudo), + Err(_) => $crate::data::json::data::PEPJSONValue::LongPseudonym( + $crate::data::long::LongPseudonym::from_string_padded(s) + ) + } + }}; + // Entry point for object ({ $($tt:tt)* }) => {{ let builder = $crate::data::json::builder::PEPJSONBuilder::new(); diff --git a/src/lib/data/json/structure.rs b/src/lib/data/json/structure.rs index 5010bfe..6149dce 100644 --- a/src/lib/data/json/structure.rs +++ b/src/lib/data/json/structure.rs @@ -4,6 +4,24 @@ use super::data::EncryptedPEPJSONValue; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use thiserror::Error; + +/// Errors that can occur when unifying structures. +#[derive(Debug, Error, Clone, PartialEq, Eq)] +pub enum UnifyError { + #[error("Cannot unify empty list of structures")] + EmptyList, + + #[error("Incompatible structure types: {0:?} and {1:?}")] + IncompatibleTypes(JSONStructure, JSONStructure), + + #[error("Arrays have different lengths: {0} and {1}")] + ArrayLengthMismatch(usize, usize), + + #[error("Objects have different fields")] + ObjectFieldMismatch, +} /// Structure descriptor that describes the shape of an EncryptedPEPJSONValue without its actual encrypted data. /// @@ -23,6 +41,151 @@ pub enum JSONStructure { Object(Vec<(String, JSONStructure)>), } +/// Unifies multiple JSON structures by taking the maximum block count for each field. +/// +/// This function is useful for batch operations where you need to normalize multiple +/// values to have the same structure. It recursively unifies nested structures, +/// taking the maximum block count for strings and pseudonyms, and ensuring that +/// arrays and objects have compatible structures. +/// +/// # Parameters +/// +/// - `structures`: A slice of JSON structures to unify +/// +/// # Returns +/// +/// Returns a unified `JSONStructure` where: +/// - For `String` and `Pseudonym`: the maximum block count across all inputs +/// - For `Array`: recursively unified element structures (all arrays must have same length) +/// - For `Object`: recursively unified field structures (all objects must have same fields) +/// - For primitives (`Null`, `Bool`, `Number`): the same type (all must match) +/// +/// # Errors +/// +/// Returns an error if: +/// - The input list is empty +/// - Structures have incompatible types (e.g., mixing `String` with `Number`) +/// - Arrays have different lengths +/// - Objects have different sets of fields +/// +/// # Example +/// +/// ```no_run +/// use libpep::data::json::structure::{JSONStructure, unify_structures}; +/// +/// let struct1 = JSONStructure::String(1); // "hi" +/// let struct2 = JSONStructure::String(2); // "hello" +/// let struct3 = JSONStructure::String(3); // "hello world" +/// +/// let unified = unify_structures(&[struct1, struct2, struct3]).unwrap(); +/// assert_eq!(unified, JSONStructure::String(3)); // Maximum block count +/// ``` +/// +/// ## Object Example +/// +/// ```no_run +/// use libpep::data::json::structure::{JSONStructure, unify_structures}; +/// +/// let obj1 = JSONStructure::Object(vec![ +/// ("name".to_string(), JSONStructure::String(1)), +/// ("email".to_string(), JSONStructure::String(1)), +/// ]); +/// +/// let obj2 = JSONStructure::Object(vec![ +/// ("name".to_string(), JSONStructure::String(1)), +/// ("email".to_string(), JSONStructure::String(3)), +/// ]); +/// +/// let unified = unify_structures(&[obj1, obj2]).unwrap(); +/// // Result: email field has 3 blocks (max of 1 and 3) +/// ``` +pub fn unify_structures(structures: &[JSONStructure]) -> Result { + if structures.is_empty() { + return Err(UnifyError::EmptyList); + } + + if structures.len() == 1 { + return Ok(structures[0].clone()); + } + + // Start with the first structure and unify with all others + let mut unified = structures[0].clone(); + for structure in &structures[1..] { + unified = unify_two_structures(&unified, structure)?; + } + + Ok(unified) +} + +/// Helper function to unify two structures. +fn unify_two_structures( + s1: &JSONStructure, + s2: &JSONStructure, +) -> Result { + match (s1, s2) { + // Primitives must match exactly + (JSONStructure::Null, JSONStructure::Null) => Ok(JSONStructure::Null), + (JSONStructure::Bool, JSONStructure::Bool) => Ok(JSONStructure::Bool), + (JSONStructure::Number, JSONStructure::Number) => Ok(JSONStructure::Number), + + // Strings: take maximum block count + (JSONStructure::String(n1), JSONStructure::String(n2)) => { + Ok(JSONStructure::String(*n1.max(n2))) + } + + // Pseudonyms: take maximum block count + (JSONStructure::Pseudonym(n1), JSONStructure::Pseudonym(n2)) => { + Ok(JSONStructure::Pseudonym(*n1.max(n2))) + } + + // Arrays: must have same length, unify element-wise + (JSONStructure::Array(arr1), JSONStructure::Array(arr2)) => { + if arr1.len() != arr2.len() { + return Err(UnifyError::ArrayLengthMismatch(arr1.len(), arr2.len())); + } + + let unified_elements: Result, _> = arr1 + .iter() + .zip(arr2.iter()) + .map(|(e1, e2)| unify_two_structures(e1, e2)) + .collect(); + + Ok(JSONStructure::Array(unified_elements?)) + } + + // Objects: must have same fields, unify field-wise + (JSONStructure::Object(fields1), JSONStructure::Object(fields2)) => { + // Convert to HashMaps for easier lookup + let map1: HashMap<_, _> = fields1.iter().map(|(k, v)| (k, v)).collect(); + let map2: HashMap<_, _> = fields2.iter().map(|(k, v)| (k, v)).collect(); + + // Check that both objects have the same set of keys + if map1.len() != map2.len() { + return Err(UnifyError::ObjectFieldMismatch); + } + + let mut unified_fields = Vec::new(); + for (key, val1) in &map1 { + match map2.get(key) { + Some(val2) => { + let unified_val = unify_two_structures(val1, val2)?; + unified_fields.push(((*key).clone(), unified_val)); + } + None => return Err(UnifyError::ObjectFieldMismatch), + } + } + + // Sort fields to ensure consistent ordering + unified_fields.sort_by(|a, b| a.0.cmp(&b.0)); + + Ok(JSONStructure::Object(unified_fields)) + } + + // Incompatible types + _ => Err(UnifyError::IncompatibleTypes(s1.clone(), s2.clone())), + } +} + /// Methods for extracting structure from EncryptedPEPJSONValue impl EncryptedPEPJSONValue { /// Get the structure/shape of this EncryptedPEPJSONValue @@ -260,4 +423,227 @@ mod tests { let deserialized: JSONStructure = serde_json::from_str(&json_str).unwrap(); assert_eq!(structure, deserialized); } + + #[test] + fn unify_strings_different_sizes() { + let s1 = JSONStructure::String(1); + let s2 = JSONStructure::String(2); + let s3 = JSONStructure::String(3); + + let unified = unify_structures(&[s1, s2, s3]).unwrap(); + assert_eq!(unified, JSONStructure::String(3)); + } + + #[test] + fn unify_pseudonyms_different_sizes() { + let p1 = JSONStructure::Pseudonym(1); + let p2 = JSONStructure::Pseudonym(4); + let p3 = JSONStructure::Pseudonym(2); + + let unified = unify_structures(&[p1, p2, p3]).unwrap(); + assert_eq!(unified, JSONStructure::Pseudonym(4)); + } + + #[test] + fn unify_primitives() { + let null_structures = vec![JSONStructure::Null, JSONStructure::Null]; + assert_eq!( + unify_structures(&null_structures).unwrap(), + JSONStructure::Null + ); + + let bool_structures = vec![JSONStructure::Bool, JSONStructure::Bool]; + assert_eq!( + unify_structures(&bool_structures).unwrap(), + JSONStructure::Bool + ); + + let num_structures = vec![ + JSONStructure::Number, + JSONStructure::Number, + JSONStructure::Number, + ]; + assert_eq!( + unify_structures(&num_structures).unwrap(), + JSONStructure::Number + ); + } + + #[test] + fn unify_arrays() { + let arr1 = JSONStructure::Array(vec![JSONStructure::String(1), JSONStructure::Number]); + + let arr2 = JSONStructure::Array(vec![JSONStructure::String(3), JSONStructure::Number]); + + let unified = unify_structures(&[arr1, arr2]).unwrap(); + assert_eq!( + unified, + JSONStructure::Array(vec![JSONStructure::String(3), JSONStructure::Number,]) + ); + } + + #[test] + fn unify_objects() { + let obj1 = JSONStructure::Object(vec![ + ("name".to_string(), JSONStructure::String(1)), + ("email".to_string(), JSONStructure::String(1)), + ]); + + let obj2 = JSONStructure::Object(vec![ + ("name".to_string(), JSONStructure::String(2)), + ("email".to_string(), JSONStructure::String(3)), + ]); + + let unified = unify_structures(&[obj1, obj2]).unwrap(); + + // Check that the unified structure has max block counts + let expected = JSONStructure::Object(vec![ + ("email".to_string(), JSONStructure::String(3)), + ("name".to_string(), JSONStructure::String(2)), + ]); + + assert_eq!(unified, expected); + } + + #[test] + fn unify_nested_objects() { + let obj1 = JSONStructure::Object(vec![ + ( + "user".to_string(), + JSONStructure::Object(vec![ + ("name".to_string(), JSONStructure::String(1)), + ("id".to_string(), JSONStructure::Pseudonym(1)), + ]), + ), + ("count".to_string(), JSONStructure::Number), + ]); + + let obj2 = JSONStructure::Object(vec![ + ( + "user".to_string(), + JSONStructure::Object(vec![ + ("name".to_string(), JSONStructure::String(3)), + ("id".to_string(), JSONStructure::Pseudonym(2)), + ]), + ), + ("count".to_string(), JSONStructure::Number), + ]); + + let unified = unify_structures(&[obj1, obj2]).unwrap(); + + let expected = JSONStructure::Object(vec![ + ("count".to_string(), JSONStructure::Number), + ( + "user".to_string(), + JSONStructure::Object(vec![ + ("id".to_string(), JSONStructure::Pseudonym(2)), + ("name".to_string(), JSONStructure::String(3)), + ]), + ), + ]); + + assert_eq!(unified, expected); + } + + #[test] + fn unify_single_structure() { + let s = JSONStructure::String(5); + let unified = unify_structures(&[s.clone()]).unwrap(); + assert_eq!(unified, s); + } + + #[test] + fn unify_empty_list_fails() { + let result = unify_structures(&[]); + assert!(matches!(result, Err(UnifyError::EmptyList))); + } + + #[test] + fn unify_incompatible_types_fails() { + let s1 = JSONStructure::String(1); + let s2 = JSONStructure::Number; + + let result = unify_structures(&[s1, s2]); + assert!(matches!(result, Err(UnifyError::IncompatibleTypes(_, _)))); + } + + #[test] + fn unify_arrays_different_lengths_fails() { + let arr1 = JSONStructure::Array(vec![JSONStructure::Number, JSONStructure::Number]); + let arr2 = JSONStructure::Array(vec![JSONStructure::Number]); + + let result = unify_structures(&[arr1, arr2]); + assert!(matches!(result, Err(UnifyError::ArrayLengthMismatch(2, 1)))); + } + + #[test] + fn unify_objects_different_fields_fails() { + let obj1 = JSONStructure::Object(vec![("name".to_string(), JSONStructure::String(1))]); + + let obj2 = JSONStructure::Object(vec![("email".to_string(), JSONStructure::String(1))]); + + let result = unify_structures(&[obj1, obj2]); + assert!(matches!(result, Err(UnifyError::ObjectFieldMismatch))); + } + + #[test] + fn unify_real_world_example() { + let mut rng = rand::rng(); + let keys = make_test_keys(); + + // Create three different user objects with varying string lengths + let user1 = PEPJSONValue::from_value(&json!({ + "name": "Alice", + "email": "a@b.c" + })); + + let user2 = PEPJSONValue::from_value(&json!({ + "name": "Bob", + "email": "bob@example.com" + })); + + let user3 = PEPJSONValue::from_value(&json!({ + "name": "Charlie Johnson", + "email": "charlie.johnson@verylongdomain.example.com" + })); + + // Encrypt them + let enc1 = encrypt(&user1, &keys, &mut rng); + let enc2 = encrypt(&user2, &keys, &mut rng); + let enc3 = encrypt(&user3, &keys, &mut rng); + + // Get their structures + let struct1 = enc1.structure(); + let struct2 = enc2.structure(); + let struct3 = enc3.structure(); + + // Unify the structures + let unified = unify_structures(&[struct1, struct2, struct3]).unwrap(); + + // The unified structure should have the maximum block count for each field + match unified { + JSONStructure::Object(fields) => { + // Find email and name fields + let email_struct = fields.iter().find(|(k, _)| k == "email").unwrap().1.clone(); + let name_struct = fields.iter().find(|(k, _)| k == "name").unwrap().1.clone(); + + // Email should have the max blocks from all three users + if let JSONStructure::String(email_blocks) = email_struct { + // user3's email is the longest + assert!(email_blocks >= 2); + } else { + panic!("Expected String structure for email"); + } + + // Name should have the max blocks from all three users + if let JSONStructure::String(name_blocks) = name_struct { + // user3's name is the longest + assert!(name_blocks >= 1); + } else { + panic!("Expected String structure for name"); + } + } + _ => panic!("Expected Object structure"), + } + } } diff --git a/src/lib/data/long.rs b/src/lib/data/long.rs index dca89e6..2f6e273 100644 --- a/src/lib/data/long.rs +++ b/src/lib/data/long.rs @@ -1,7 +1,32 @@ //! Long (multi-block) data types for pseudonyms and attributes. //! //! This module provides support for multi-block pseudonyms and attributes that can hold -//! more than 16 bytes of data. These types are built on top of PKCS#7 padding. +//! more than 16 bytes of data. +//! +//! # Two Types of Padding +//! +//! This module handles **two distinct types of padding**: +//! +//! ## 1. Internal Padding (PKCS#7) +//! +//! Standard PKCS#7 padding is applied **within** the last data block. This is handled +//! automatically during encoding/decoding: +//! - Ensures data fills complete 16-byte blocks +//! - The padding byte value indicates the number of padding bytes +//! - Example: "hello" (5 bytes) → `68 65 6C 6C 6F 0B 0B 0B 0B 0B 0B 0B 0B 0B 0B 0B` +//! +//! ## 2. External Padding (for Batch Unlinkability) +//! +//! External padding adds **full blocks** to ensure all values in a batch have identical +//! structure, which is required for unlinkable batch transcryption: +//! - Added using the `pad_to(n)` method +//! - Each padding block contains all bytes = 0x10 (for 16-byte blocks) +//! - Automatically detected and removed during decoding +//! - Example: A 1-block string padded to 3 blocks → [data block][0x10×16][0x10×16] +//! +//! **Why external padding?** In batch transcryption, all values must have identical structure +//! to prevent linkability attacks. External padding normalizes different-sized values to the +//! same structure without modifying their content. use crate::arithmetic::scalars::ScalarNonZero; use crate::data::simple::{ @@ -161,6 +186,81 @@ impl LongPseudonym { .collect::>() .join("") } + + /// Adds **external padding** to reach a target number of blocks for batch unlinkability. + /// + /// ## Purpose: Batch Transcryption Unlinkability + /// + /// In batch transcryption, all values **must have identical structure** to prevent + /// linkability attacks. This method adds full padding blocks (external padding) to + /// normalize different-sized pseudonyms to the same structure without modifying content. + /// + /// ## How it Works + /// + /// - Adds full blocks of `0x10` (external padding) after the data blocks + /// - These blocks are **separate from** the internal PKCS#7 padding within blocks + /// - External padding is automatically detected and removed during decoding + /// - The original pseudonym value is perfectly preserved + /// + /// ## Parameters + /// + /// - `target_blocks`: The desired number of blocks (must be >= current block count) + /// + /// ## Returns + /// + /// Returns a new `LongPseudonym` padded to the target number of blocks. + /// + /// ## Errors + /// + /// Returns an error if: + /// - The current number of blocks exceeds the target + /// + /// ## Example: Normalizing for Batch Processing + /// + /// ```no_run + /// use libpep::data::long::LongPseudonym; + /// + /// let short_pseudo = LongPseudonym::from_string_padded("user123"); // 1 block + /// let long_pseudo = LongPseudonym::from_string_padded("user@example.com"); // 2 blocks + /// + /// // Normalize both to 2 blocks for unlinkable batch transcryption + /// let short_padded = short_pseudo.pad_to(2).unwrap(); + /// let long_padded = long_pseudo.pad_to(2).unwrap(); + /// + /// // Both now have identical structure (2 blocks) + /// assert_eq!(short_padded.len(), 2); + /// assert_eq!(long_padded.len(), 2); + /// + /// // Original values are preserved when decoded + /// assert_eq!(short_padded.to_string_padded().unwrap(), "user123"); + /// assert_eq!(long_padded.to_string_padded().unwrap(), "user@example.com"); + /// ``` + pub fn pad_to(&self, target_blocks: usize) -> Result { + let current_blocks = self.0.len(); + + if current_blocks > target_blocks { + return Err(Error::new( + ErrorKind::InvalidInput, + format!( + "Cannot pad: current blocks ({}) exceeds target ({})", + current_blocks, target_blocks + ), + )); + } + + if current_blocks == target_blocks { + return Ok(self.clone()); + } + + // Create a full PKCS#7 padding block (all bytes = 0x10 for 16-byte blocks) + // Note: Pseudonym also uses 16-byte blocks like Attribute + let padding_block = Pseudonym::from_lizard(&[0x10; 16]); + + let mut blocks = self.0.clone(); + blocks.resize(target_blocks, padding_block); + + Ok(LongPseudonym(blocks)) + } } impl LongAttribute { @@ -238,6 +338,63 @@ impl LongAttribute { .collect::>() .join("") } + + /// Pads this `LongAttribute` to a target number of blocks. + /// + /// This is useful for batch operations where all attributes must have the same structure. + /// The padding blocks are full PKCS#7 padding blocks (all bytes = 0x10) which are + /// automatically detected and skipped during decoding. + /// + /// # Parameters + /// + /// - `target_blocks`: The desired number of blocks (must be >= current block count) + /// + /// # Returns + /// + /// Returns a new `LongAttribute` padded to the target number of blocks. + /// + /// # Errors + /// + /// Returns an error if: + /// - The current number of blocks exceeds the target + /// + /// # Example + /// + /// ```no_run + /// use libpep::data::long::LongAttribute; + /// + /// let attr = LongAttribute::from_string_padded("hello"); + /// // Pad to 3 blocks for batch processing + /// let padded = attr.pad_to(3).unwrap(); + /// assert_eq!(padded.len(), 3); + /// // Decoding still returns the original string + /// assert_eq!(padded.to_string_padded().unwrap(), "hello"); + /// ``` + pub fn pad_to(&self, target_blocks: usize) -> Result { + let current_blocks = self.0.len(); + + if current_blocks > target_blocks { + return Err(Error::new( + ErrorKind::InvalidInput, + format!( + "Cannot pad: current blocks ({}) exceeds target ({})", + current_blocks, target_blocks + ), + )); + } + + if current_blocks == target_blocks { + return Ok(self.clone()); + } + + // Create a full PKCS#7 padding block (all bytes = 0x10 for 16-byte blocks) + let padding_block = Attribute::from_lizard(&[0x10; 16]); + + let mut blocks = self.0.clone(); + blocks.resize(target_blocks, padding_block); + + Ok(LongAttribute(blocks)) + } } impl LongEncryptedPseudonym { @@ -877,7 +1034,45 @@ fn from_bytes_padded_impl(data: &[u8]) -> Vec { result } -/// Internal helper function to decode padded bytes +/// Helper function to check if a block is a full PKCS#7 padding block (external padding). +/// +/// A full padding block has all bytes equal to the block size (0x10 for 16-byte blocks, +/// 0x20 for 32-byte blocks). These are used by `pad_to()` for batch processing. +fn is_external_padding_block(block: &[u8]) -> bool { + if block.is_empty() { + return false; + } + + let expected_padding = block.len() as u8; + block.iter().all(|&b| b == expected_padding) +} + +/// Helper to check if a block has valid PKCS#7 padding. +fn has_valid_pkcs7_padding(block: &[u8]) -> bool { + if block.len() != 16 { + return false; + } + + let padding_byte = block[15]; + + // Padding must be between 1 and 16 + if padding_byte == 0 || padding_byte > 16 { + return false; + } + + // All padding bytes must have the same value + block[16 - padding_byte as usize..] + .iter() + .all(|&b| b == padding_byte) +} + +/// Internal helper function to decode padded bytes. +/// +/// This function automatically detects and stops at external padding blocks +/// created by `pad_to()`, ensuring that normalized values decode correctly. +/// +/// External padding blocks (all bytes = 0x10) are distinguished from legitimate +/// full PKCS#7 padding blocks by checking if the previous block has valid padding. fn to_bytes_padded_impl(items: &[T]) -> Result, Error> { if items.is_empty() { return Err(Error::new( @@ -886,10 +1081,51 @@ fn to_bytes_padded_impl(items: &[T]) -> Result, E )); } - let mut result = Vec::with_capacity(items.len() * 16); + // Find the last non-external-padding block + let mut last_data_block_idx = items.len() - 1; - // Copy all blocks except the last one - for item in items.iter().take(items.len() - 1) { + for i in (0..items.len()).rev() { + let block = items[i].to_lizard().ok_or(Error::new( + ErrorKind::InvalidData, + "Encryptable conversion to bytes failed", + ))?; + + // Check if this looks like an external padding block + if is_external_padding_block(&block) { + // It looks like external padding, but we need to check if it's actually + // legitimate PKCS#7 padding for the previous block + if i > 0 { + let prev_block = items[i - 1].to_lizard().ok_or(Error::new( + ErrorKind::InvalidData, + "Encryptable conversion to bytes failed", + ))?; + + // If the previous block doesn't have valid PKCS#7 padding, + // it means it's a full data block and this "external padding" + // is actually legitimate PKCS#7 padding + if !has_valid_pkcs7_padding(&prev_block) { + // This is legitimate PKCS#7 padding, not external padding + last_data_block_idx = i; + break; + } + // Otherwise, this is external padding, continue scanning backwards + } else { + // First block looks like external padding - it must be legitimate + // (e.g., empty string encoded) + last_data_block_idx = 0; + break; + } + } else { + // Found a non-external-padding block + last_data_block_idx = i; + break; + } + } + + let mut result = Vec::with_capacity((last_data_block_idx + 1) * 16); + + // Copy all blocks except the last data block + for item in items.iter().take(last_data_block_idx) { let block = item.to_lizard().ok_or(Error::new( ErrorKind::InvalidData, "Encryptable conversion to bytes failed", @@ -897,10 +1133,8 @@ fn to_bytes_padded_impl(items: &[T]) -> Result, E result.extend_from_slice(&block); } - // Process the last block and validate padding - // Unwrap is safe: we already checked items.is_empty() above - #[allow(clippy::unwrap_used)] - let last_block = items.last().unwrap().to_lizard().ok_or(Error::new( + // Process the last data block and validate PKCS#7 padding + let last_block = items[last_data_block_idx].to_lizard().ok_or(Error::new( ErrorKind::InvalidData, "Last encryptable conversion to bytes failed", ))?; @@ -1236,4 +1470,146 @@ mod tests { assert_eq!(1, deserialized.len()); assert_eq!(long_encrypted[0], deserialized[0]); } + + #[test] + fn long_attribute_null_bytes_in_middle() { + // Test string with null bytes in the middle + let str_with_nulls = "hello\0world"; + let attr = LongAttribute::from_string_padded(str_with_nulls); + let decoded = attr.to_string_padded().unwrap(); + assert_eq!(str_with_nulls, decoded); + } + + #[test] + fn long_attribute_null_bytes_at_end() { + // Test string ending with null bytes + let str_ending_nulls = "test\0\0"; + let attr = LongAttribute::from_string_padded(str_ending_nulls); + let decoded = attr.to_string_padded().unwrap(); + assert_eq!(str_ending_nulls, decoded); + } + + #[test] + fn long_attribute_empty_string() { + // Test empty string + let empty = ""; + let attr = LongAttribute::from_string_padded(empty); + let decoded = attr.to_string_padded().unwrap(); + assert_eq!(empty, decoded); + } + + #[test] + fn long_attribute_strings_ending_with_many_null_bytes() { + // Test various counts of trailing null bytes + for null_count in 1..=20 { + let mut test_str = String::from("test"); + test_str.push_str(&"\0".repeat(null_count)); + + let attr = LongAttribute::from_string_padded(&test_str); + let decoded = attr.to_string_padded().unwrap(); + + assert_eq!(test_str, decoded, "Failed for {} null bytes", null_count); + } + } + + #[test] + fn long_attribute_only_null_bytes() { + // Test strings that are only null bytes + for null_count in 1..=20 { + let test_str = "\0".repeat(null_count); + + let attr = LongAttribute::from_string_padded(&test_str); + let decoded = attr.to_string_padded().unwrap(); + + assert_eq!( + test_str, decoded, + "Failed for string of {} null bytes", + null_count + ); + } + } + + #[test] + fn long_attribute_edge_case_15_and_16_null_bytes() { + // 15 null bytes - exactly fits in one block with 1 byte padding + let str_15 = "\0".repeat(15); + let attr_15 = LongAttribute::from_string_padded(&str_15); + let decoded_15 = attr_15.to_string_padded().unwrap(); + assert_eq!(str_15, decoded_15); + + // 16 null bytes - requires 2 blocks (first full, second with 15 bytes data + 1 padding) + let str_16 = "\0".repeat(16); + let attr_16 = LongAttribute::from_string_padded(&str_16); + let decoded_16 = attr_16.to_string_padded().unwrap(); + assert_eq!(str_16, decoded_16); + + // 17 null bytes + let str_17 = "\0".repeat(17); + let attr_17 = LongAttribute::from_string_padded(&str_17); + let decoded_17 = attr_17.to_string_padded().unwrap(); + assert_eq!(str_17, decoded_17); + } + + #[test] + fn long_attribute_pad_to_with_null_bytes() { + // Create a string with null bytes + let str_with_nulls = "data\0\0end"; + let attr = LongAttribute::from_string_padded(str_with_nulls); + + // Pad to more blocks + let padded = attr.pad_to(3).unwrap(); + + // Should preserve the null bytes in the original string + let decoded = padded.to_string_padded().unwrap(); + assert_eq!(str_with_nulls, decoded); + } + + #[test] + fn long_attribute_pad_to_only_null_bytes() { + // Test strings that are only null bytes, then padded + for null_count in 1..=10 { + let test_str = "\0".repeat(null_count); + + let attr = LongAttribute::from_string_padded(&test_str); + let padded = attr.pad_to(5).unwrap(); + + let decoded = padded.to_string_padded().unwrap(); + assert_eq!( + test_str, decoded, + "Failed for padded string of {} null bytes", + null_count + ); + } + } + + #[test] + fn long_attribute_pad_to_empty_string() { + // Test empty string with padding + let empty = ""; + let attr = LongAttribute::from_string_padded(empty); + let padded = attr.pad_to(2).unwrap(); + + let decoded = padded.to_string_padded().unwrap(); + assert_eq!(empty, decoded); + } + + #[test] + fn long_pseudonym_null_bytes_roundtrip() { + // Test pseudonym with null bytes + let str_with_nulls = "user\0\0id"; + let pseudo = LongPseudonym::from_string_padded(str_with_nulls); + let decoded = pseudo.to_string_padded().unwrap(); + assert_eq!(str_with_nulls, decoded); + } + + #[test] + fn long_pseudonym_pad_to_with_null_bytes() { + // Test pseudonym with null bytes after padding + let str_with_nulls = "id\0\0x"; + let pseudo = LongPseudonym::from_string_padded(str_with_nulls); + let padded = pseudo.pad_to(3).unwrap(); + + let decoded = padded.to_string_padded().unwrap(); + assert_eq!(str_with_nulls, decoded); + } } diff --git a/src/lib/data/py/json.rs b/src/lib/data/py/json.rs index 0222016..d1e1f8c 100644 --- a/src/lib/data/py/json.rs +++ b/src/lib/data/py/json.rs @@ -64,6 +64,39 @@ impl PyPEPJSONValue { .map_err(|e| PyValueError::new_err(format!("Conversion failed: {}", e)))?; Python::attach(|py| json_to_python(py, &json_value)) } + + /// Get the structure/shape of this PEPJSONValue. + /// + /// Returns: + /// A JSONStructure describing the shape + #[pyo3(name = "structure")] + fn structure(&self) -> PyJSONStructure { + PyJSONStructure(self.0.structure()) + } + + /// Pads this PEPJSONValue to match a target structure by adding external padding blocks. + /// + /// This method adds external padding blocks (separate from PKCS#7 padding) to + /// LongString and LongPseudonym variants to ensure all instances have the same + /// number of blocks when encrypted. This is necessary for batch transcryption where + /// all values must have identical structure. + /// + /// Args: + /// structure: The target structure specifying the number of blocks for each field + /// + /// Returns: + /// A padded PEPJSONValue with padding blocks added where necessary + /// + /// Raises: + /// ValueError: If the current structure doesn't match the target structure type + /// or if the current size exceeds the target size + #[pyo3(name = "pad_to")] + fn pad_to(&self, structure: &PyJSONStructure) -> PyResult { + self.0 + .pad_to(&structure.0) + .map(Self) + .map_err(|e| PyValueError::new_err(format!("Padding failed: {}", e))) + } } /// An encrypted PEP JSON value. @@ -455,6 +488,29 @@ pub fn py_bytes_to_number(bytes: [u8; 9]) -> f64 { num.as_f64().unwrap_or(0.0) } +/// Unifies multiple JSON structures by taking the maximum block count for each field. +/// +/// This function is useful for batch operations where you need to normalize multiple +/// values to have the same structure. It recursively unifies nested structures, +/// taking the maximum block count for strings and pseudonyms. +/// +/// Args: +/// structures: A list of JSONStructure objects to unify +/// +/// Returns: +/// A unified JSONStructure where string and pseudonym fields have maximum block counts +/// +/// Raises: +/// ValueError: If the structures are incompatible (different types, array lengths, or object fields) +#[pyfunction] +#[pyo3(name = "unify_structures")] +pub fn py_unify_structures(structures: Vec) -> PyResult { + let rust_structures: Vec = structures.into_iter().map(|s| s.0).collect(); + crate::data::json::structure::unify_structures(&rust_structures) + .map(PyJSONStructure) + .map_err(|e| PyValueError::new_err(format!("Unification failed: {}", e))) +} + pub fn register(m: &Bound<'_, PyModule>) -> PyResult<()> { // Register main JSON types at json module level m.add_class::()?; @@ -480,6 +536,7 @@ pub fn register(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction!(py_byte_to_bool, m)?)?; m.add_function(wrap_pyfunction!(py_number_to_bytes, m)?)?; m.add_function(wrap_pyfunction!(py_bytes_to_number, m)?)?; + m.add_function(wrap_pyfunction!(py_unify_structures, m)?)?; Ok(()) } diff --git a/src/lib/data/py/long.rs b/src/lib/data/py/long.rs index 181b040..49dadb1 100644 --- a/src/lib/data/py/long.rs +++ b/src/lib/data/py/long.rs @@ -69,6 +69,28 @@ impl PyLongPseudonym { Ok(PyBytes::new(py, &result).into()) } + /// Pads this LongPseudonym to a target number of blocks for batch unlinkability. + /// + /// In batch transcryption, all values must have identical structure to prevent + /// linkability attacks. This method adds external padding blocks to normalize + /// different-sized pseudonyms to the same structure. + /// + /// Args: + /// target_blocks: The desired number of blocks (must be >= current block count) + /// + /// Returns: + /// A new LongPseudonym padded to the target number of blocks + /// + /// Raises: + /// ValueError: If the current number of blocks exceeds the target + #[pyo3(name = "pad_to")] + fn pad_to(&self, target_blocks: usize) -> PyResult { + self.0 + .pad_to(target_blocks) + .map(Self) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("Padding failed: {e}"))) + } + /// Get the underlying pseudonyms. #[pyo3(name = "pseudonyms")] fn pseudonyms(&self) -> Vec { @@ -139,6 +161,27 @@ impl PyLongAttribute { Ok(PyBytes::new(py, &result).into()) } + /// Pads this LongAttribute to a target number of blocks for batch operations. + /// + /// This is useful for batch operations where all attributes must have the same structure. + /// The padding blocks are automatically detected and skipped during decoding. + /// + /// Args: + /// target_blocks: The desired number of blocks (must be >= current block count) + /// + /// Returns: + /// A new LongAttribute padded to the target number of blocks + /// + /// Raises: + /// ValueError: If the current number of blocks exceeds the target + #[pyo3(name = "pad_to")] + fn pad_to(&self, target_blocks: usize) -> PyResult { + self.0 + .pad_to(target_blocks) + .map(Self) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("Padding failed: {e}"))) + } + /// Get the underlying attributes. #[pyo3(name = "attributes")] fn attributes(&self) -> Vec { diff --git a/src/lib/data/py/records.rs b/src/lib/data/py/records.rs index e6b3311..50af282 100644 --- a/src/lib/data/py/records.rs +++ b/src/lib/data/py/records.rs @@ -10,7 +10,7 @@ use crate::data::py::simple::{ }; use crate::data::records::{EncryptedRecord, Record}; #[cfg(feature = "long")] -use crate::data::records::{LongEncryptedRecord, LongRecord}; +use crate::data::records::{LongEncryptedRecord, LongRecord, LongRecordStructure}; use crate::keys::py::PySessionKeys; use crate::keys::types::SessionKeys; use pyo3::prelude::*; @@ -177,6 +177,39 @@ impl PyLongRecord { self.0.attributes.len() ) } + + /// Get the structure of this LongRecord. + /// + /// Returns: + /// A LongRecordStructure describing the number of blocks in each pseudonym and attribute + #[pyo3(name = "structure")] + fn structure(&self) -> PyLongRecordStructure { + PyLongRecordStructure(self.0.structure()) + } + + /// Pads this LongRecord to match a target structure by adding external padding blocks. + /// + /// This method adds external padding blocks (separate from PKCS#7 padding) to + /// each pseudonym and attribute to ensure all records have the same structure. + /// This is necessary for batch transcryption where all values must have identical + /// structure to prevent linkability attacks. + /// + /// Args: + /// structure: The target structure specifying the number of blocks for each field + /// + /// Returns: + /// A padded LongRecord with padding blocks added where necessary + /// + /// Raises: + /// ValueError: If the number of pseudonyms/attributes doesn't match the structure + /// or if any field exceeds its target size + #[pyo3(name = "pad_to")] + fn pad_to(&self, structure: &PyLongRecordStructure) -> PyResult { + self.0 + .pad_to(&structure.0) + .map(Self) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("Padding failed: {e}"))) + } } #[cfg(feature = "long")] @@ -268,6 +301,48 @@ pub fn py_decrypt_long_record( PyLongRecord(decrypt(&encrypted.0, &keys)) } +#[cfg(feature = "long")] +/// Structure descriptor for LongRecords - describes the shape including block counts. +#[pyclass(name = "LongRecordStructure", from_py_object)] +#[derive(Clone)] +pub struct PyLongRecordStructure(pub(crate) LongRecordStructure); + +#[cfg(feature = "long")] +#[pymethods] +impl PyLongRecordStructure { + /// Create a new LongRecordStructure with block counts for pseudonyms and attributes. + /// + /// Args: + /// pseudonym_blocks: List of block counts for each pseudonym + /// attribute_blocks: List of block counts for each attribute + #[new] + pub fn new(pseudonym_blocks: Vec, attribute_blocks: Vec) -> Self { + PyLongRecordStructure(LongRecordStructure { + pseudonym_blocks, + attribute_blocks, + }) + } + + /// Get the block counts for pseudonyms. + #[getter] + pub fn pseudonym_blocks(&self) -> Vec { + self.0.pseudonym_blocks.clone() + } + + /// Get the block counts for attributes. + #[getter] + pub fn attribute_blocks(&self) -> Vec { + self.0.attribute_blocks.clone() + } + + fn __repr__(&self) -> String { + format!( + "LongRecordStructure(pseudonym_blocks={:?}, attribute_blocks={:?})", + self.0.pseudonym_blocks, self.0.attribute_blocks + ) + } +} + pub fn register(m: &Bound<'_, PyModule>) -> PyResult<()> { // Register Record types m.add_class::()?; @@ -278,6 +353,7 @@ pub fn register(m: &Bound<'_, PyModule>) -> PyResult<()> { { m.add_class::()?; m.add_class::()?; + m.add_class::()?; } Ok(()) diff --git a/src/lib/data/records.rs b/src/lib/data/records.rs index b4e5bbd..1a59a58 100644 --- a/src/lib/data/records.rs +++ b/src/lib/data/records.rs @@ -185,6 +185,84 @@ impl LongRecord { attributes, } } + + /// Pads this LongRecord to match a target structure by adding external padding blocks. + /// + /// This method adds external padding blocks (separate from PKCS#7 padding) to + /// each pseudonym and attribute to ensure all records have the same structure. + /// This is necessary for batch transcryption where all values must have identical + /// structure to prevent linkability attacks. + /// + /// # Arguments + /// + /// * `structure` - The target structure specifying the number of blocks for each field + /// + /// # Returns + /// + /// A padded LongRecord with padding blocks added where necessary + /// + /// # Errors + /// + /// Returns an error if: + /// - The number of pseudonyms doesn't match the structure + /// - The number of attributes doesn't match the structure + /// - Any pseudonym or attribute exceeds its target size + pub fn pad_to(&self, structure: &LongRecordStructure) -> Result { + // Validate counts + if self.pseudonyms.len() != structure.pseudonym_blocks.len() { + return Err(Error::new( + ErrorKind::InvalidInput, + format!( + "Pseudonym count mismatch: record has {} but structure expects {}", + self.pseudonyms.len(), + structure.pseudonym_blocks.len() + ), + )); + } + + if self.attributes.len() != structure.attribute_blocks.len() { + return Err(Error::new( + ErrorKind::InvalidInput, + format!( + "Attribute count mismatch: record has {} but structure expects {}", + self.attributes.len(), + structure.attribute_blocks.len() + ), + )); + } + + // Pad pseudonyms + let padded_pseudonyms: Result, _> = self + .pseudonyms + .iter() + .zip(structure.pseudonym_blocks.iter()) + .map(|(p, &target_blocks)| p.pad_to(target_blocks)) + .collect(); + + // Pad attributes + let padded_attributes: Result, _> = self + .attributes + .iter() + .zip(structure.attribute_blocks.iter()) + .map(|(a, &target_blocks)| a.pad_to(target_blocks)) + .collect(); + + Ok(LongRecord { + pseudonyms: padded_pseudonyms?, + attributes: padded_attributes?, + }) + } + + /// Get the structure of this LongRecord. + /// + /// Returns a `LongRecordStructure` describing the number of blocks in each + /// pseudonym and attribute. + pub fn structure(&self) -> LongRecordStructure { + LongRecordStructure { + pseudonym_blocks: self.pseudonyms.iter().map(|p| p.0.len()).collect(), + attribute_blocks: self.attributes.iter().map(|a| a.0.len()).collect(), + } + } } #[cfg(feature = "long")] diff --git a/src/lib/data/wasm/json.rs b/src/lib/data/wasm/json.rs index 5579bd3..6bb39cb 100644 --- a/src/lib/data/wasm/json.rs +++ b/src/lib/data/wasm/json.rs @@ -66,6 +66,43 @@ impl WASMPEPJSONValue { serde_wasm_bindgen::to_value(&json_value) .map_err(|e| JsValue::from_str(&format!("Failed to convert to JS: {}", e))) } + + /// Get the structure/shape of this PEPJSONValue. + /// + /// # Returns + /// + /// A JSONStructure describing the shape + #[wasm_bindgen] + pub fn structure(&self) -> WASMJSONStructure { + WASMJSONStructure(self.0.structure()) + } + + /// Pads this PEPJSONValue to match a target structure by adding external padding blocks. + /// + /// This method adds external padding blocks (separate from PKCS#7 padding) to + /// LongString and LongPseudonym variants to ensure all instances have the same + /// number of blocks when encrypted. This is necessary for batch transcryption where + /// all values must have identical structure. + /// + /// # Arguments + /// + /// * `structure` - The target structure specifying the number of blocks for each field + /// + /// # Returns + /// + /// A padded PEPJSONValue with padding blocks added where necessary + /// + /// # Errors + /// + /// Throws an error if the current structure doesn't match the target structure type + /// or if the current size exceeds the target size + #[wasm_bindgen(js_name = padTo)] + pub fn pad_to(&self, structure: &WASMJSONStructure) -> Result { + self.0 + .pad_to(&structure.0) + .map(Self) + .map_err(|e| JsValue::from_str(&format!("Padding failed: {}", e))) + } } /// An encrypted PEP JSON value. @@ -415,3 +452,30 @@ pub fn wasm_bytes_to_number(bytes: Vec) -> Result { let num = utils::bytes_to_number(&arr); Ok(num.as_f64().unwrap_or(0.0)) } + +/// Unifies multiple JSON structures by taking the maximum block count for each field. +/// +/// This function is useful for batch operations where you need to normalize multiple +/// values to have the same structure. It recursively unifies nested structures, +/// taking the maximum block count for strings and pseudonyms. +/// +/// # Arguments +/// +/// * `structures` - An array of JSONStructure objects to unify +/// +/// # Returns +/// +/// A unified JSONStructure where string and pseudonym fields have maximum block counts +/// +/// # Errors +/// +/// Returns an error if the structures are incompatible (different types, array lengths, or object fields) +#[wasm_bindgen(js_name = unifyStructures)] +pub fn wasm_unify_structures( + structures: Vec, +) -> Result { + let rust_structures: Vec = structures.into_iter().map(|s| s.0).collect(); + crate::data::json::structure::unify_structures(&rust_structures) + .map(WASMJSONStructure) + .map_err(|e| JsValue::from_str(&format!("Unification failed: {}", e))) +} diff --git a/src/lib/data/wasm/long.rs b/src/lib/data/wasm/long.rs index 7da1f7d..a8ac901 100644 --- a/src/lib/data/wasm/long.rs +++ b/src/lib/data/wasm/long.rs @@ -50,6 +50,31 @@ impl WASMLongPseudonym { .map_err(|e| JsError::new(&format!("Decoding failed: {e}"))) } + /// Pads this LongPseudonym to a target number of blocks for batch unlinkability. + /// + /// In batch transcryption, all values must have identical structure to prevent + /// linkability attacks. This method adds external padding blocks to normalize + /// different-sized pseudonyms to the same structure. + /// + /// # Arguments + /// + /// * `targetBlocks` - The desired number of blocks (must be >= current block count) + /// + /// # Returns + /// + /// A new LongPseudonym padded to the target number of blocks + /// + /// # Errors + /// + /// Throws an error if the current number of blocks exceeds the target + #[wasm_bindgen(js_name = padTo)] + pub fn pad_to(&self, target_blocks: usize) -> Result { + self.0 + .pad_to(target_blocks) + .map(Self) + .map_err(|e| JsError::new(&format!("Padding failed: {e}"))) + } + /// Get the underlying pseudonyms. #[wasm_bindgen(getter)] pub fn pseudonyms(&self) -> Vec { @@ -111,6 +136,30 @@ impl WASMLongAttribute { .map_err(|e| JsError::new(&format!("Decoding failed: {e}"))) } + /// Pads this LongAttribute to a target number of blocks for batch operations. + /// + /// This is useful for batch operations where all attributes must have the same structure. + /// The padding blocks are automatically detected and skipped during decoding. + /// + /// # Arguments + /// + /// * `targetBlocks` - The desired number of blocks (must be >= current block count) + /// + /// # Returns + /// + /// A new LongAttribute padded to the target number of blocks + /// + /// # Errors + /// + /// Throws an error if the current number of blocks exceeds the target + #[wasm_bindgen(js_name = padTo)] + pub fn pad_to(&self, target_blocks: usize) -> Result { + self.0 + .pad_to(target_blocks) + .map(Self) + .map_err(|e| JsError::new(&format!("Padding failed: {e}"))) + } + /// Get the underlying attributes. #[wasm_bindgen(getter)] pub fn attributes(&self) -> Vec { diff --git a/src/lib/data/wasm/records.rs b/src/lib/data/wasm/records.rs index 1ba7fa7..a1664cb 100644 --- a/src/lib/data/wasm/records.rs +++ b/src/lib/data/wasm/records.rs @@ -7,7 +7,7 @@ use crate::data::wasm::simple::{ use wasm_bindgen::prelude::*; #[cfg(feature = "long")] -use crate::data::records::{LongEncryptedRecord, LongRecord}; +use crate::data::records::{LongEncryptedRecord, LongRecord, LongRecordStructure}; #[cfg(feature = "long")] use crate::data::wasm::long::{ WASMLongAttribute, WASMLongEncryptedAttribute, WASMLongEncryptedPseudonym, WASMLongPseudonym, @@ -156,6 +156,45 @@ impl WASMLongRecord { pub fn attributes(&self) -> Vec { self.attributes.clone() } + + /// Get the structure of this LongRecord. + /// + /// # Returns + /// + /// A LongRecordStructure describing the number of blocks in each pseudonym and attribute + #[wasm_bindgen] + pub fn structure(&self) -> WASMLongRecordStructure { + let rust_record: LongRecord = self.clone().into(); + WASMLongRecordStructure(rust_record.structure()) + } + + /// Pads this LongRecord to match a target structure by adding external padding blocks. + /// + /// This method adds external padding blocks (separate from PKCS#7 padding) to + /// each pseudonym and attribute to ensure all records have the same structure. + /// This is necessary for batch transcryption where all values must have identical + /// structure to prevent linkability attacks. + /// + /// # Arguments + /// + /// * `structure` - The target structure specifying the number of blocks for each field + /// + /// # Returns + /// + /// A padded LongRecord with padding blocks added where necessary + /// + /// # Errors + /// + /// Throws an error if the number of pseudonyms/attributes doesn't match the structure + /// or if any field exceeds its target size + #[wasm_bindgen(js_name = padTo)] + pub fn pad_to(&self, structure: &WASMLongRecordStructure) -> Result { + let rust_record: LongRecord = self.clone().into(); + rust_record + .pad_to(&structure.0) + .map(WASMLongRecord::from) + .map_err(|e| JsValue::from_str(&format!("Padding failed: {e}"))) + } } #[cfg(feature = "long")] @@ -250,3 +289,38 @@ impl From for LongEncryptedRecord { ) } } + +#[cfg(feature = "long")] +/// Structure descriptor for LongRecords - describes the shape including block counts. +#[wasm_bindgen(js_name = LongRecordStructure)] +pub struct WASMLongRecordStructure(pub(crate) LongRecordStructure); + +#[cfg(feature = "long")] +#[wasm_bindgen(js_class = LongRecordStructure)] +impl WASMLongRecordStructure { + /// Create a new LongRecordStructure with block counts for pseudonyms and attributes. + /// + /// # Arguments + /// + /// * `pseudonymBlocks` - Array of block counts for each pseudonym + /// * `attributeBlocks` - Array of block counts for each attribute + #[wasm_bindgen(constructor)] + pub fn new(pseudonym_blocks: Vec, attribute_blocks: Vec) -> Self { + WASMLongRecordStructure(LongRecordStructure { + pseudonym_blocks, + attribute_blocks, + }) + } + + /// Get the block counts for pseudonyms. + #[wasm_bindgen(getter, js_name = pseudonymBlocks)] + pub fn pseudonym_blocks(&self) -> Vec { + self.0.pseudonym_blocks.clone() + } + + /// Get the block counts for attributes. + #[wasm_bindgen(getter, js_name = attributeBlocks)] + pub fn attribute_blocks(&self) -> Vec { + self.0.attribute_blocks.clone() + } +} From 054e8af53e1831ae6abbed800fd1e28f5d934119 Mon Sep 17 00:00:00 2001 From: Job Doesburg Date: Fri, 13 Feb 2026 11:54:12 +0100 Subject: [PATCH 02/11] Change external padding scheme --- src/lib/data/json/data.rs | 2 - src/lib/data/json/macros.rs | 7 +- src/lib/data/json/structure.rs | 2 +- src/lib/data/long.rs | 383 +++++++++++++++++++++++++++------ src/lib/data/records.rs | 12 +- 5 files changed, 329 insertions(+), 77 deletions(-) diff --git a/src/lib/data/json/data.rs b/src/lib/data/json/data.rs index 485256f..2a922be 100644 --- a/src/lib/data/json/data.rs +++ b/src/lib/data/json/data.rs @@ -1373,8 +1373,6 @@ mod tests { #[test] #[cfg(feature = "long")] fn normalize_nested_objects_different_string_sizes() { - use super::super::structure::JSONStructure; - let mut rng = rand::rng(); let keys = make_test_keys(); diff --git a/src/lib/data/json/macros.rs b/src/lib/data/json/macros.rs index 805a1b4..7d83f13 100644 --- a/src/lib/data/json/macros.rs +++ b/src/lib/data/json/macros.rs @@ -32,12 +32,13 @@ macro_rules! pep_json { // Entry point for standalone pseudonym (pseudonym($value:expr)) => {{ - let s = $value; + let s = &$value; + let s_str: &str = s.as_ref(); // Always try short first, then fall back to long if needed - match $crate::data::simple::Pseudonym::from_string_padded(s) { + match $crate::data::simple::Pseudonym::from_string_padded(s_str) { Ok(pseudo) => $crate::data::json::data::PEPJSONValue::Pseudonym(pseudo), Err(_) => $crate::data::json::data::PEPJSONValue::LongPseudonym( - $crate::data::long::LongPseudonym::from_string_padded(s) + $crate::data::long::LongPseudonym::from_string_padded(s_str) ) } }}; diff --git a/src/lib/data/json/structure.rs b/src/lib/data/json/structure.rs index 6149dce..cd8f9f3 100644 --- a/src/lib/data/json/structure.rs +++ b/src/lib/data/json/structure.rs @@ -548,7 +548,7 @@ mod tests { #[test] fn unify_single_structure() { let s = JSONStructure::String(5); - let unified = unify_structures(&[s.clone()]).unwrap(); + let unified = unify_structures(std::slice::from_ref(&s)).unwrap(); assert_eq!(unified, s); } diff --git a/src/lib/data/long.rs b/src/lib/data/long.rs index 2f6e273..e1e11e5 100644 --- a/src/lib/data/long.rs +++ b/src/lib/data/long.rs @@ -13,20 +13,60 @@ //! automatically during encoding/decoding: //! - Ensures data fills complete 16-byte blocks //! - The padding byte value indicates the number of padding bytes -//! - Example: "hello" (5 bytes) → `68 65 6C 6C 6F 0B 0B 0B 0B 0B 0B 0B 0B 0B 0B 0B` +//! - Always applied, even if data is exactly a multiple of 16 bytes +//! +//! **Examples:** +//! +//! ```text +//! "hello" (5 bytes): +//! Block 1: [h e l l o | 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B] +//! └─ data ─┘ └──────────────── 11 padding bytes ────────────────────┘ +//! (padding value = 11 because we need 11 bytes to fill the block) +//! +//! "0123456789ABCDEF" (exactly 16 bytes): +//! Block 1: [0 1 2 3 4 5 6 7 8 9 A B C D E F] ← full data block +//! Block 2: [0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10] +//! └──────────────────── full padding block (16 bytes) ───────────────────────────┘ +//! +//! "hello world!" (12 bytes): +//! Block 1: [h e l l o w o r l d ! | 0x04 0x04 0x04 0x04] +//! └────────── data ──────────┘ └── 4 padding bytes ──┘ +//! ``` //! //! ## 2. External Padding (for Batch Unlinkability) //! //! External padding adds **full blocks** to ensure all values in a batch have identical //! structure, which is required for unlinkable batch transcryption: -//! - Added using the `pad_to(n)` method -//! - Each padding block contains all bytes = 0x10 (for 16-byte blocks) +//! - Added using the `pad_to(n)` method (optional, not automatic) +//! - Each padding block contains a magic marker and metadata about the original data size +//! - Format: `[0xFF, 0xEE, 0xDD, 0xCC, original_count (8 bytes u64 LE), 0x00, 0x00, 0x00, 0x00]` +//! - The magic marker is impossible in PKCS#7 (valid padding bytes are 0x01-0x10), making it unambiguous //! - Automatically detected and removed during decoding -//! - Example: A 1-block string padded to 3 blocks → [data block][0x10×16][0x10×16] +//! +//! **Example:** Padding "hello" from 1 block to 3 blocks: +//! +//! ```text +//! Original (1 block): +//! Block 1: [h e l l o | 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B] +//! +//! After pad_to(3): +//! Block 1: [h e l l o | 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B] ← original data +//! Block 2: [0xFF 0xEE 0xDD 0xCC | 0x01 0x00 0x00 0x00 0x00 0x00 0x00 0x00 | 0x00 0x00 0x00 0x00] +//! └─── magic marker ──┘ └─── original_count=1 (u64 LE) ────┘ └── zeros ──┘ +//! Block 3: [0xFF 0xEE 0xDD 0xCC | 0x01 0x00 0x00 0x00 0x00 0x00 0x00 0x00 | 0x00 0x00 0x00 0x00] +//! └────────────────── external padding block (same pattern) ──────────────────────┘ +//! ``` //! //! **Why external padding?** In batch transcryption, all values must have identical structure //! to prevent linkability attacks. External padding normalizes different-sized values to the //! same structure without modifying their content. +//! +//! **Batch use case:** +//! ```text +//! User A: "id123" → 1 block → pad_to(3) → 3 blocks +//! User B: "user@example.com" → 2 blocks → pad_to(3) → 3 blocks +//! Both now have identical structure (3 blocks), preventing linkability during batch transcryption. +//! ``` use crate::arithmetic::scalars::ScalarNonZero; use crate::data::simple::{ @@ -252,9 +292,9 @@ impl LongPseudonym { return Ok(self.clone()); } - // Create a full PKCS#7 padding block (all bytes = 0x10 for 16-byte blocks) - // Note: Pseudonym also uses 16-byte blocks like Attribute - let padding_block = Pseudonym::from_lizard(&[0x10; 16]); + // Create external padding blocks with metadata about the original block count + let padding_pattern = create_external_padding_block(current_blocks); + let padding_block = Pseudonym::from_lizard(&padding_pattern); let mut blocks = self.0.clone(); blocks.resize(target_blocks, padding_block); @@ -387,8 +427,9 @@ impl LongAttribute { return Ok(self.clone()); } - // Create a full PKCS#7 padding block (all bytes = 0x10 for 16-byte blocks) - let padding_block = Attribute::from_lizard(&[0x10; 16]); + // Create external padding blocks with metadata about the original block count + let padding_pattern = create_external_padding_block(current_blocks); + let padding_block = Attribute::from_lizard(&padding_pattern); let mut blocks = self.0.clone(); blocks.resize(target_blocks, padding_block); @@ -1034,45 +1075,74 @@ fn from_bytes_padded_impl(data: &[u8]) -> Vec { result } -/// Helper function to check if a block is a full PKCS#7 padding block (external padding). -/// -/// A full padding block has all bytes equal to the block size (0x10 for 16-byte blocks, -/// 0x20 for 32-byte blocks). These are used by `pad_to()` for batch processing. -fn is_external_padding_block(block: &[u8]) -> bool { - if block.is_empty() { - return false; - } +/// Magic marker for external padding blocks. +/// This sequence is impossible in valid PKCS#7 padding for 16-byte blocks +/// (valid padding bytes are 0x01-0x10, so 0xFF is impossible). +const EXTERNAL_PADDING_MAGIC: [u8; 4] = [0xFF, 0xEE, 0xDD, 0xCC]; - let expected_padding = block.len() as u8; - block.iter().all(|&b| b == expected_padding) +/// Creates an external padding block with metadata about the original block count. +/// +/// Format: [0xFF, 0xEE, 0xDD, 0xCC, original_count (8 bytes as u64 LE), 0x00, 0x00, 0x00, 0x00] +/// +/// The magic marker (0xFF, 0xEE, 0xDD, 0xCC) is impossible in PKCS#7 padding for 16-byte blocks, +/// making this unambiguous. The original block count allows the decoder to know exactly where +/// the real data ends and external padding begins. +fn create_external_padding_block(original_block_count: usize) -> [u8; 16] { + let mut block = [0u8; 16]; + block[0..4].copy_from_slice(&EXTERNAL_PADDING_MAGIC); + block[4..12].copy_from_slice(&(original_block_count as u64).to_le_bytes()); + // Remaining 4 bytes stay as 0x00 + block } -/// Helper to check if a block has valid PKCS#7 padding. -fn has_valid_pkcs7_padding(block: &[u8]) -> bool { +/// Checks if a block is an external padding block and extracts the original block count. +/// +/// Returns `Some(original_block_count)` if this is an external padding block, +/// or `None` if this is a regular data block. +/// +/// # Disambiguation Guarantee +/// +/// This function **guarantees 100% accurate detection** with no false positives: +/// +/// - External padding blocks **always** end with `[0x00, 0x00, 0x00, 0x00]` +/// - PKCS#7 data blocks **never** have `0x00` in the last byte (valid padding: `0x01`-`0x10`) +/// +/// This means **ALL possible byte sequences can be encoded without ambiguity**, including: +/// - Data starting with the magic marker `[0xFF, 0xEE, 0xDD, 0xCC]` +/// - Data exactly matching the external padding pattern +/// - Any combination of bytes whatsoever +fn is_external_padding_block(block: &[u8]) -> Option { if block.len() != 16 { - return false; + return None; } - let padding_byte = block[15]; + // Check magic marker + if block[0..4] != EXTERNAL_PADDING_MAGIC { + return None; + } - // Padding must be between 1 and 16 - if padding_byte == 0 || padding_byte > 16 { - return false; + // Check that last 4 bytes are zeros (as created by create_external_padding_block) + // This prevents false positives where legitimate data happens to start with the magic marker + if block[12..16] != [0x00, 0x00, 0x00, 0x00] { + return None; } - // All padding bytes must have the same value - block[16 - padding_byte as usize..] - .iter() - .all(|&b| b == padding_byte) + // Extract original block count + let count_bytes: [u8; 8] = match block[4..12].try_into() { + Ok(bytes) => bytes, + Err(_) => return None, + }; + + Some(u64::from_le_bytes(count_bytes) as usize) } /// Internal helper function to decode padded bytes. /// -/// This function automatically detects and stops at external padding blocks +/// This function automatically detects and skips external padding blocks /// created by `pad_to()`, ensuring that normalized values decode correctly. /// -/// External padding blocks (all bytes = 0x10) are distinguished from legitimate -/// full PKCS#7 padding blocks by checking if the previous block has valid padding. +/// External padding uses a magic marker `[0xFF, 0xEE, 0xDD, 0xCC, ...]` which is +/// impossible in valid PKCS#7 padding (valid padding bytes are 0x01-0x10). fn to_bytes_padded_impl(items: &[T]) -> Result, Error> { if items.is_empty() { return Err(Error::new( @@ -1081,46 +1151,30 @@ fn to_bytes_padded_impl(items: &[T]) -> Result, E )); } - // Find the last non-external-padding block - let mut last_data_block_idx = items.len() - 1; - - for i in (0..items.len()).rev() { - let block = items[i].to_lizard().ok_or(Error::new( + // Check if there are external padding blocks at the end + // If so, extract the original block count from them + let last_data_block_idx = if !items.is_empty() { + let last_block = items[items.len() - 1].to_lizard().ok_or(Error::new( ErrorKind::InvalidData, "Encryptable conversion to bytes failed", ))?; - // Check if this looks like an external padding block - if is_external_padding_block(&block) { - // It looks like external padding, but we need to check if it's actually - // legitimate PKCS#7 padding for the previous block - if i > 0 { - let prev_block = items[i - 1].to_lizard().ok_or(Error::new( + if let Some(original_count) = is_external_padding_block(&last_block) { + // We have external padding, use the encoded original block count + if original_count > items.len() { + return Err(Error::new( ErrorKind::InvalidData, - "Encryptable conversion to bytes failed", - ))?; - - // If the previous block doesn't have valid PKCS#7 padding, - // it means it's a full data block and this "external padding" - // is actually legitimate PKCS#7 padding - if !has_valid_pkcs7_padding(&prev_block) { - // This is legitimate PKCS#7 padding, not external padding - last_data_block_idx = i; - break; - } - // Otherwise, this is external padding, continue scanning backwards - } else { - // First block looks like external padding - it must be legitimate - // (e.g., empty string encoded) - last_data_block_idx = 0; - break; + "External padding contains invalid block count", + )); } + original_count - 1 // -1 because we use it as an index } else { - // Found a non-external-padding block - last_data_block_idx = i; - break; + // No external padding, last block is the last data block + items.len() - 1 } - } + } else { + return Err(Error::new(ErrorKind::InvalidData, "Empty data")); + }; let mut result = Vec::with_capacity((last_data_block_idx + 1) * 16); @@ -1612,4 +1666,203 @@ mod tests { let decoded = padded.to_string_padded().unwrap(); assert_eq!(str_with_nulls, decoded); } + + #[test] + fn long_attribute_data_ending_with_full_0x10_block() { + // Regression test for external padding detection: + // Plaintext containing a full block of 0x10 bytes should roundtrip correctly. + // After PKCS#7 encoding, this becomes [0x10×16][0x10×16] (data block + padding block). + // The decoder must correctly identify the second block as legitimate PKCS#7 padding, + // not external padding added by pad_to(). + let data = vec![0x10u8; 16]; + let attr = LongAttribute::from_bytes_padded(&data); + let decoded = attr.to_bytes_padded().unwrap(); + assert_eq!( + data, decoded, + "Data ending with full 0x10 block should roundtrip correctly" + ); + } + + #[test] + fn long_attribute_ascending_sequence_data() { + // Regression test: ensure data containing ascending sequence [0,1,2,...,15] + // can be encoded and decoded correctly even though it looks like a pattern. + let data: Vec = (0..16).collect(); + let attr = LongAttribute::from_bytes_padded(&data); + let decoded = attr.to_bytes_padded().unwrap(); + assert_eq!( + data, decoded, + "Ascending sequence data should roundtrip correctly" + ); + } + + #[test] + fn long_attribute_pad_to_preserves_data() { + // Test that pad_to correctly preserves data when adding external padding + let attr = LongAttribute::from_string_padded("hello"); + let original_len = attr.len(); + + let padded = attr.pad_to(original_len + 2).unwrap(); + assert_eq!( + padded.len(), + original_len + 2, + "Padded length should match target" + ); + + let decoded = padded.to_string_padded().unwrap(); + assert_eq!(decoded, "hello", "pad_to should preserve original data"); + } + + #[test] + fn long_pseudonym_pad_to_preserves_data() { + // Test that pad_to correctly preserves data for pseudonyms + let pseudo = LongPseudonym::from_string_padded("test-user-id"); + let original_len = pseudo.len(); + + let padded = pseudo.pad_to(original_len + 3).unwrap(); + assert_eq!( + padded.len(), + original_len + 3, + "Padded length should match target" + ); + + let decoded = padded.to_string_padded().unwrap(); + assert_eq!( + decoded, "test-user-id", + "pad_to should preserve original data" + ); + } + + #[test] + fn long_attribute_data_containing_magic_marker_multiblock() { + // Edge case: Data that contains the magic marker bytes [0xFF, 0xEE, 0xDD, 0xCC] + // CAN be encoded if it spans multiple blocks (>16 bytes). + // The magic marker check only applies to the last block. + let data = vec![ + 0xFF, 0xEE, 0xDD, 0xCC, 0x99, 0x88, 0x77, 0x66, 0x55, 0x44, 0x33, 0x22, 0x11, 0x00, + 0xAA, 0xBB, 0xCC, + ]; + let attr = LongAttribute::from_bytes_padded(&data); + let decoded = attr.to_bytes_padded().unwrap(); + assert_eq!( + data, decoded, + "Multi-block data containing magic marker should roundtrip correctly" + ); + } + + #[test] + fn long_attribute_single_block_with_magic_marker() { + // Edge case: Data starting with magic marker CAN now be encoded + // because we check that the last 4 bytes are zeros. + // + // After PKCS#7: [0xFF, 0xEE, 0xDD, 0xCC, 0x99, 0x88, 0x77, 0x66, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08] + // Last 4 bytes are [0x08, 0x08, 0x08, 0x08], not [0x00, 0x00, 0x00, 0x00] + // So this is NOT detected as external padding and roundtrips correctly. + let data = vec![0xFF, 0xEE, 0xDD, 0xCC, 0x99, 0x88, 0x77, 0x66]; + let attr = LongAttribute::from_bytes_padded(&data); + + let decoded = attr.to_bytes_padded().unwrap(); + assert_eq!( + data, decoded, + "Single-block data starting with magic marker should roundtrip correctly" + ); + } + + #[test] + fn long_attribute_data_exactly_matching_external_padding_pattern() { + // Edge case: Data that exactly matches our external padding block format. + // After PKCS#7 encoding, this should still work correctly. + let data = vec![ + 0xFF, 0xEE, 0xDD, 0xCC, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, + ]; + let attr = LongAttribute::from_bytes_padded(&data); + + // After PKCS#7: will be 2 blocks: [data][full padding block] + // The last block will be [0x10×16], not the magic marker + assert_eq!(attr.len(), 2); + + let decoded = attr.to_bytes_padded().unwrap(); + assert_eq!( + data, decoded, + "Data matching external padding pattern should roundtrip correctly" + ); + } + + #[test] + fn long_attribute_double_pad_to_fails() { + // Edge case: Calling pad_to() multiple times should fail to decode + // because the decoder will find nested external padding blocks. + let attr = LongAttribute::from_string_padded("test"); + let padded_once = attr.pad_to(2).unwrap(); + let padded_twice = padded_once.pad_to(3).unwrap(); + + // This should fail because block 1 (external padding with count=1) + // doesn't have valid PKCS#7 padding + let result = padded_twice.to_string_padded(); + assert!(result.is_err(), "Double pad_to should fail to decode"); + } + + #[test] + fn verify_no_ambiguous_edge_cases() { + // Comprehensive verification that ALL data can be encoded without ambiguity + // This test confirms the fix for the edge case where single-block data + // starting with the magic marker [0xFF, 0xEE, 0xDD, 0xCC] previously failed. + + // Test 1: Single-block data starting with magic marker (previously failed) + let data1 = vec![0xFF, 0xEE, 0xDD, 0xCC, 0x99, 0x88, 0x77, 0x66]; + let attr1 = LongAttribute::from_bytes_padded(&data1); + let decoded1 = attr1.to_bytes_padded().unwrap(); + assert_eq!( + data1, decoded1, + "Single-block data with magic marker should work" + ); + + // Test 2: Exact external padding pattern as data (with PKCS#7, last block differs) + let data2 = vec![ + 0xFF, 0xEE, 0xDD, 0xCC, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, + ]; + let attr2 = LongAttribute::from_bytes_padded(&data2); + let decoded2 = attr2.to_bytes_padded().unwrap(); + assert_eq!( + data2, decoded2, + "Data matching external padding pattern should work" + ); + + // Test 3: Magic marker in middle of multi-block data + let data3 = vec![ + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0xFF, 0xEE, 0xDD, 0xCC, 0x11, 0x12, + 0x13, 0x14, 0x15, 0x16, + ]; + let attr3 = LongAttribute::from_bytes_padded(&data3); + let decoded3 = attr3.to_bytes_padded().unwrap(); + assert_eq!(data3, decoded3, "Magic marker in middle should work"); + + // Test 4: pad_to() still works correctly + let attr4 = LongAttribute::from_string_padded("hello"); + let padded = attr4.pad_to(3).unwrap(); + assert_eq!( + padded.len(), + 3, + "pad_to should create correct number of blocks" + ); + let decoded4 = padded.to_string_padded().unwrap(); + assert_eq!("hello", decoded4, "pad_to should preserve original data"); + + // Test 5: Various lengths starting with magic marker + for len in 1..=32 { + let mut data = vec![0xFF, 0xEE, 0xDD, 0xCC]; + data.extend(vec![0xAB; len]); + + let attr = LongAttribute::from_bytes_padded(&data); + let decoded = attr.to_bytes_padded().unwrap(); + assert_eq!( + data, + decoded, + "Data of length {} starting with magic marker should work", + len + 4 + ); + } + } } diff --git a/src/lib/data/records.rs b/src/lib/data/records.rs index 1a59a58..15d34e1 100644 --- a/src/lib/data/records.rs +++ b/src/lib/data/records.rs @@ -232,24 +232,24 @@ impl LongRecord { } // Pad pseudonyms - let padded_pseudonyms: Result, _> = self + let padded_pseudonyms: Vec<_> = self .pseudonyms .iter() .zip(structure.pseudonym_blocks.iter()) .map(|(p, &target_blocks)| p.pad_to(target_blocks)) - .collect(); + .collect::>()?; // Pad attributes - let padded_attributes: Result, _> = self + let padded_attributes: Vec<_> = self .attributes .iter() .zip(structure.attribute_blocks.iter()) .map(|(a, &target_blocks)| a.pad_to(target_blocks)) - .collect(); + .collect::>()?; Ok(LongRecord { - pseudonyms: padded_pseudonyms?, - attributes: padded_attributes?, + pseudonyms: padded_pseudonyms, + attributes: padded_attributes, }) } From 88d9942ad7342f78aac72c61f1fc4e8524cf61f6 Mon Sep 17 00:00:00 2001 From: Job Doesburg Date: Fri, 13 Feb 2026 12:06:21 +0100 Subject: [PATCH 03/11] Fix tests --- src/lib/client/wasm/types.rs | 12 +++++++++--- src/lib/data/long.rs | 8 ++++---- src/lib/data/wasm/long.rs | 4 ++++ src/lib/data/wasm/records.rs | 14 ++++++++++++-- src/lib/transcryptor/wasm/distributed.rs | 3 ++- src/lib/transcryptor/wasm/types.rs | 3 ++- 6 files changed, 33 insertions(+), 11 deletions(-) diff --git a/src/lib/client/wasm/types.rs b/src/lib/client/wasm/types.rs index 50f569a..7b78072 100644 --- a/src/lib/client/wasm/types.rs +++ b/src/lib/client/wasm/types.rs @@ -2,21 +2,27 @@ #[cfg(feature = "offline")] use crate::client::OfflineClient; -#[cfg(feature = "json")] +#[cfg(all(feature = "offline", feature = "json"))] use crate::data::wasm::json::{WASMEncryptedPEPJSONValue, WASMPEPJSONValue}; -#[cfg(feature = "long")] +#[cfg(all(feature = "offline", feature = "long"))] use crate::data::wasm::long::{ WASMLongAttribute, WASMLongEncryptedAttribute, WASMLongEncryptedPseudonym, WASMLongPseudonym, }; -#[cfg(feature = "long")] +#[cfg(all(feature = "offline", feature = "long"))] use crate::data::wasm::records::{WASMLongRecord, WASMLongRecordEncrypted}; +#[cfg(feature = "offline")] use crate::data::wasm::records::{WASMRecord, WASMRecordEncrypted}; +#[cfg(feature = "offline")] use crate::data::wasm::simple::{ WASMAttribute, WASMEncryptedAttribute, WASMEncryptedPseudonym, WASMPseudonym, }; +#[cfg(feature = "offline")] use crate::keys::wasm::types::WASMGlobalPublicKeys; +#[cfg(feature = "offline")] use crate::keys::*; +#[cfg(feature = "offline")] use derive_more::{Deref, From, Into}; +#[cfg(feature = "offline")] use wasm_bindgen::prelude::*; /// An offline PEP client. diff --git a/src/lib/data/long.rs b/src/lib/data/long.rs index e1e11e5..32474ed 100644 --- a/src/lib/data/long.rs +++ b/src/lib/data/long.rs @@ -79,9 +79,11 @@ use crate::factors::{ AttributeRekeyInfo, PseudonymRekeyInfo, PseudonymizationInfo, RerandomizeFactor, }; use crate::keys::{ - AttributeGlobalPublicKey, AttributeSessionPublicKey, AttributeSessionSecretKey, - PseudonymGlobalPublicKey, PseudonymSessionPublicKey, PseudonymSessionSecretKey, + AttributeSessionPublicKey, AttributeSessionSecretKey, PseudonymSessionPublicKey, + PseudonymSessionSecretKey, }; +#[cfg(feature = "offline")] +use crate::keys::{AttributeGlobalPublicKey, PseudonymGlobalPublicKey}; use derive_more::{Deref, From}; use rand_core::{CryptoRng, Rng}; #[cfg(feature = "serde")] @@ -1102,8 +1104,6 @@ fn create_external_padding_block(original_block_count: usize) -> [u8; 16] { /// /// # Disambiguation Guarantee /// -/// This function **guarantees 100% accurate detection** with no false positives: -/// /// - External padding blocks **always** end with `[0x00, 0x00, 0x00, 0x00]` /// - PKCS#7 data blocks **never** have `0x00` in the last byte (valid padding: `0x01`-`0x10`) /// diff --git a/src/lib/data/wasm/long.rs b/src/lib/data/wasm/long.rs index a8ac901..da8393c 100644 --- a/src/lib/data/wasm/long.rs +++ b/src/lib/data/wasm/long.rs @@ -284,9 +284,13 @@ impl WASMLongEncryptedAttribute { } /// WASM bindings for batch operations on long (multi-block) data types. +#[cfg(feature = "batch")] use crate::data::records::LongEncryptedRecord; +#[cfg(feature = "batch")] use crate::factors::wasm::contexts::WASMTranscryptionInfo; +#[cfg(feature = "batch")] use crate::factors::wasm::types::WASMPseudonymRekeyFactor; +#[cfg(feature = "batch")] use crate::factors::TranscryptionInfo; #[cfg(feature = "batch")] use crate::transcryptor::{rekey_batch, transcrypt_batch}; diff --git a/src/lib/data/wasm/records.rs b/src/lib/data/wasm/records.rs index a1664cb..029af59 100644 --- a/src/lib/data/wasm/records.rs +++ b/src/lib/data/wasm/records.rs @@ -164,7 +164,7 @@ impl WASMLongRecord { /// A LongRecordStructure describing the number of blocks in each pseudonym and attribute #[wasm_bindgen] pub fn structure(&self) -> WASMLongRecordStructure { - let rust_record: LongRecord = self.clone().into(); + let rust_record: LongRecord = self.into(); WASMLongRecordStructure(rust_record.structure()) } @@ -189,7 +189,7 @@ impl WASMLongRecord { /// or if any field exceeds its target size #[wasm_bindgen(js_name = padTo)] pub fn pad_to(&self, structure: &WASMLongRecordStructure) -> Result { - let rust_record: LongRecord = self.clone().into(); + let rust_record: LongRecord = self.into(); rust_record .pad_to(&structure.0) .map(WASMLongRecord::from) @@ -207,6 +207,16 @@ impl From for LongRecord { } } +#[cfg(feature = "long")] +impl From<&WASMLongRecord> for LongRecord { + fn from(record: &WASMLongRecord) -> Self { + LongRecord::new( + record.pseudonyms.iter().map(|p| p.0.clone()).collect(), + record.attributes.iter().map(|a| a.0.clone()).collect(), + ) + } +} + #[cfg(feature = "long")] impl From for WASMLongRecord { fn from(record: LongRecord) -> Self { diff --git a/src/lib/transcryptor/wasm/distributed.rs b/src/lib/transcryptor/wasm/distributed.rs index 915601f..d91f5be 100644 --- a/src/lib/transcryptor/wasm/distributed.rs +++ b/src/lib/transcryptor/wasm/distributed.rs @@ -1,7 +1,8 @@ //! WASM bindings for distributed transcryptor. -#[cfg(feature = "long")] +#[cfg(all(feature = "long", feature = "batch"))] use crate::data::long::{LongEncryptedAttribute, LongEncryptedPseudonym}; +#[cfg(feature = "batch")] use crate::data::simple::{EncryptedAttribute, EncryptedPseudonym}; #[cfg(feature = "long")] use crate::data::wasm::long::{WASMLongEncryptedAttribute, WASMLongEncryptedPseudonym}; diff --git a/src/lib/transcryptor/wasm/types.rs b/src/lib/transcryptor/wasm/types.rs index b7b6a53..630e4d7 100644 --- a/src/lib/transcryptor/wasm/types.rs +++ b/src/lib/transcryptor/wasm/types.rs @@ -1,7 +1,8 @@ //! WASM bindings for transcryptor types. -#[cfg(feature = "long")] +#[cfg(all(feature = "long", feature = "batch"))] use crate::data::long::{LongEncryptedAttribute, LongEncryptedPseudonym}; +#[cfg(feature = "batch")] use crate::data::simple::{EncryptedAttribute, EncryptedPseudonym}; #[cfg(feature = "long")] use crate::data::wasm::long::{WASMLongEncryptedAttribute, WASMLongEncryptedPseudonym}; From 5c9be5f26ca7c87f58992e7e17731d88fa038808 Mon Sep 17 00:00:00 2001 From: Job Doesburg Date: Fri, 13 Feb 2026 12:08:12 +0100 Subject: [PATCH 04/11] Update documentation --- src/lib/data/json/data.rs | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/lib/data/json/data.rs b/src/lib/data/json/data.rs index 2a922be..c619110 100644 --- a/src/lib/data/json/data.rs +++ b/src/lib/data/json/data.rs @@ -233,13 +233,16 @@ impl PEPJSONValue { /// Pads this PEPJSONValue to match a target structure by adding external padding blocks. /// - /// This method adds external padding blocks (separate from PKCS#7 padding) to - /// `LongString` and `LongPseudonym` variants to ensure all instances have the same - /// number of blocks when encrypted. This is necessary for batch transcryption where - /// all values must have identical structure. + /// This method adds external padding blocks (separate from any internal PKCS#7-style + /// padding used inside individual ciphertext blocks) to `LongString` and + /// `LongPseudonym` variants to ensure all instances have the same number of blocks + /// when encrypted. This is necessary for batch transcryption where all encrypted values must + /// have identical structure to prevent linkability. /// - /// The padding uses full PKCS#7 padding blocks which are automatically detected and - /// stripped during decoding, ensuring the original values are perfectly preserved. + /// The external padding blocks are special marker blocks that contain a magic byte + /// pattern (for example `[0xFF, 0xEE, 0xDD, 0xCC, ...]`) and no user data. These + /// marker blocks are automatically detected as padding and stripped during decoding, + /// ensuring the original values are perfectly preserved. /// /// # Parameters /// From e20faf84299566205b0bc8a8b5104bf9c2862018 Mon Sep 17 00:00:00 2001 From: Job Doesburg Date: Fri, 13 Feb 2026 12:09:46 +0100 Subject: [PATCH 05/11] Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/lib/data/long.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/lib/data/long.rs b/src/lib/data/long.rs index 32474ed..fa99d3b 100644 --- a/src/lib/data/long.rs +++ b/src/lib/data/long.rs @@ -239,7 +239,9 @@ impl LongPseudonym { /// /// ## How it Works /// - /// - Adds full blocks of `0x10` (external padding) after the data blocks + /// - Appends one or more **external padding blocks** after the data blocks + /// using a magic marker pattern: + /// `[0xFF, 0xEE, 0xDD, 0xCC, original_count (8 bytes u64 LE), 0x00, 0x00, 0x00, 0x00]` /// - These blocks are **separate from** the internal PKCS#7 padding within blocks /// - External padding is automatically detected and removed during decoding /// - The original pseudonym value is perfectly preserved @@ -384,8 +386,9 @@ impl LongAttribute { /// Pads this `LongAttribute` to a target number of blocks. /// /// This is useful for batch operations where all attributes must have the same structure. - /// The padding blocks are full PKCS#7 padding blocks (all bytes = 0x10) which are - /// automatically detected and skipped during decoding. + /// Any additional padding blocks use a special magic-marker format + /// `[0xFF, 0xEE, 0xDD, 0xCC, original_count (8 bytes u64 LE), 0x00, 0x00, 0x00, 0x00]` + /// and are automatically detected and skipped during decoding. /// /// # Parameters /// From a2cc4de7b6888b457d0c87c5dea32cf40770f6c4 Mon Sep 17 00:00:00 2001 From: Job Doesburg Date: Fri, 13 Feb 2026 12:13:31 +0100 Subject: [PATCH 06/11] Fix bug --- src/lib/data/long.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/lib/data/long.rs b/src/lib/data/long.rs index fa99d3b..3995f89 100644 --- a/src/lib/data/long.rs +++ b/src/lib/data/long.rs @@ -78,12 +78,12 @@ use crate::factors::TranscryptionInfo; use crate::factors::{ AttributeRekeyInfo, PseudonymRekeyInfo, PseudonymizationInfo, RerandomizeFactor, }; +#[cfg(feature = "offline")] +use crate::keys::{AttributeGlobalPublicKey, PseudonymGlobalPublicKey}; use crate::keys::{ AttributeSessionPublicKey, AttributeSessionSecretKey, PseudonymSessionPublicKey, PseudonymSessionSecretKey, }; -#[cfg(feature = "offline")] -use crate::keys::{AttributeGlobalPublicKey, PseudonymGlobalPublicKey}; use derive_more::{Deref, From}; use rand_core::{CryptoRng, Rng}; #[cfg(feature = "serde")] @@ -1164,7 +1164,8 @@ fn to_bytes_padded_impl(items: &[T]) -> Result, E if let Some(original_count) = is_external_padding_block(&last_block) { // We have external padding, use the encoded original block count - if original_count > items.len() { + // Check for underflow: original_count must be at least 1 + if original_count == 0 || original_count > items.len() { return Err(Error::new( ErrorKind::InvalidData, "External padding contains invalid block count", From 5f82bb1fc74653c4c92b5e6dfd489484836e4514 Mon Sep 17 00:00:00 2001 From: Job Doesburg Date: Fri, 13 Feb 2026 13:51:02 +0100 Subject: [PATCH 07/11] Improvements --- Cargo.toml | 2 +- package.json | 2 +- src/lib/data/json/macros.rs | 54 ++++++- src/lib/data/long.rs | 126 +-------------- src/lib/data/padding.rs | 303 ------------------------------------ 5 files changed, 57 insertions(+), 430 deletions(-) delete mode 100644 src/lib/data/padding.rs diff --git a/Cargo.toml b/Cargo.toml index 5cb4afa..be2aad6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "libpep" edition = "2021" -version = "0.11.1" +version = "0.12.0" authors = ["Bernard van Gastel ", "Job Doesburg "] homepage = "https://github.com/NOLAI/libpep" repository = "https://github.com/NOLAI/libpep" diff --git a/package.json b/package.json index 239109f..278e549 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@nolai/libpep-wasm", - "version": "0.11.1", + "version": "0.12.0", "description": "Library for polymorphic encryption and pseudonymization (in WASM)", "repository": { "type": "git", diff --git a/src/lib/data/json/macros.rs b/src/lib/data/json/macros.rs index 7d83f13..396f422 100644 --- a/src/lib/data/json/macros.rs +++ b/src/lib/data/json/macros.rs @@ -32,8 +32,9 @@ macro_rules! pep_json { // Entry point for standalone pseudonym (pseudonym($value:expr)) => {{ - let s = &$value; - let s_str: &str = s.as_ref(); + use $crate::data::padding::Padded; + let value = $value; + let s_str: &str = value.as_ref(); // Always try short first, then fall back to long if needed match $crate::data::simple::Pseudonym::from_string_padded(s_str) { Ok(pseudo) => $crate::data::json::data::PEPJSONValue::Pseudonym(pseudo), @@ -56,12 +57,14 @@ macro_rules! pep_json { // Pseudonym field (last field, no trailing comma) (@object $builder:ident, $key:literal : pseudonym($value:expr)) => {{ - $builder.pseudonym($key, $value).build() + let value = $value; + $builder.pseudonym($key, value.as_ref()).build() }}; // Pseudonym field with more fields following (@object $builder:ident, $key:literal : pseudonym($value:expr), $($rest:tt)*) => {{ - let builder = $builder.pseudonym($key, $value); + let value = $value; + let builder = $builder.pseudonym($key, value.as_ref()); pep_json!(@object builder, $($rest)*) }}; @@ -258,4 +261,47 @@ mod tests { assert_eq!(expected, decrypted.to_value().unwrap()); } + + #[test] + fn macro_with_string_variables() { + let mut rng = rand::rng(); + let keys = make_test_keys(); + + // Test with String variables (not just string literals) + let user_id = String::from("user@example.com"); + let pep_value = pep_json!({ + "id": pseudonym(user_id) + }); + + let encrypted = encrypt(&pep_value, &keys, &mut rng); + #[cfg(feature = "elgamal3")] + let decrypted = decrypt(&encrypted, &keys).unwrap(); + + #[cfg(not(feature = "elgamal3"))] + let decrypted = decrypt(&encrypted, &keys); + + let expected = json!({ + "id": "user@example.com" + }); + + assert_eq!(expected, decrypted.to_value().unwrap()); + } + + #[test] + fn macro_standalone_pseudonym_with_string() { + // Test standalone pseudonym with String variable + let user_id = String::from("test@example.com"); + let pep_value = pep_json!(pseudonym(user_id)); + + // Verify it creates the correct variant + match pep_value { + crate::data::json::data::PEPJSONValue::Pseudonym(_) => { + // Expected for short pseudonyms + } + crate::data::json::data::PEPJSONValue::LongPseudonym(_) => { + // Also acceptable if string is long + } + _ => panic!("Expected Pseudonym or LongPseudonym variant"), + } + } } diff --git a/src/lib/data/long.rs b/src/lib/data/long.rs index 3995f89..eead13a 100644 --- a/src/lib/data/long.rs +++ b/src/lib/data/long.rs @@ -3,72 +3,15 @@ //! This module provides support for multi-block pseudonyms and attributes that can hold //! more than 16 bytes of data. //! -//! # Two Types of Padding +//! # Padding //! -//! This module handles **two distinct types of padding**: +//! Long data types use PKCS#7 padding (internal padding) automatically for the last block. +//! They also support optional external padding via the `pad_to()` method for batch unlinkability. //! -//! ## 1. Internal Padding (PKCS#7) -//! -//! Standard PKCS#7 padding is applied **within** the last data block. This is handled -//! automatically during encoding/decoding: -//! - Ensures data fills complete 16-byte blocks -//! - The padding byte value indicates the number of padding bytes -//! - Always applied, even if data is exactly a multiple of 16 bytes -//! -//! **Examples:** -//! -//! ```text -//! "hello" (5 bytes): -//! Block 1: [h e l l o | 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B] -//! └─ data ─┘ └──────────────── 11 padding bytes ────────────────────┘ -//! (padding value = 11 because we need 11 bytes to fill the block) -//! -//! "0123456789ABCDEF" (exactly 16 bytes): -//! Block 1: [0 1 2 3 4 5 6 7 8 9 A B C D E F] ← full data block -//! Block 2: [0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10] -//! └──────────────────── full padding block (16 bytes) ───────────────────────────┘ -//! -//! "hello world!" (12 bytes): -//! Block 1: [h e l l o w o r l d ! | 0x04 0x04 0x04 0x04] -//! └────────── data ──────────┘ └── 4 padding bytes ──┘ -//! ``` -//! -//! ## 2. External Padding (for Batch Unlinkability) -//! -//! External padding adds **full blocks** to ensure all values in a batch have identical -//! structure, which is required for unlinkable batch transcryption: -//! - Added using the `pad_to(n)` method (optional, not automatic) -//! - Each padding block contains a magic marker and metadata about the original data size -//! - Format: `[0xFF, 0xEE, 0xDD, 0xCC, original_count (8 bytes u64 LE), 0x00, 0x00, 0x00, 0x00]` -//! - The magic marker is impossible in PKCS#7 (valid padding bytes are 0x01-0x10), making it unambiguous -//! - Automatically detected and removed during decoding -//! -//! **Example:** Padding "hello" from 1 block to 3 blocks: -//! -//! ```text -//! Original (1 block): -//! Block 1: [h e l l o | 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B] -//! -//! After pad_to(3): -//! Block 1: [h e l l o | 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B] ← original data -//! Block 2: [0xFF 0xEE 0xDD 0xCC | 0x01 0x00 0x00 0x00 0x00 0x00 0x00 0x00 | 0x00 0x00 0x00 0x00] -//! └─── magic marker ──┘ └─── original_count=1 (u64 LE) ────┘ └── zeros ──┘ -//! Block 3: [0xFF 0xEE 0xDD 0xCC | 0x01 0x00 0x00 0x00 0x00 0x00 0x00 0x00 | 0x00 0x00 0x00 0x00] -//! └────────────────── external padding block (same pattern) ──────────────────────┘ -//! ``` -//! -//! **Why external padding?** In batch transcryption, all values must have identical structure -//! to prevent linkability attacks. External padding normalizes different-sized values to the -//! same structure without modifying their content. -//! -//! **Batch use case:** -//! ```text -//! User A: "id123" → 1 block → pad_to(3) → 3 blocks -//! User B: "user@example.com" → 2 blocks → pad_to(3) → 3 blocks -//! Both now have identical structure (3 blocks), preventing linkability during batch transcryption. -//! ``` +//! For detailed information about the two types of padding, see the [`padding`](crate::data::padding) module. use crate::arithmetic::scalars::ScalarNonZero; +use crate::data::padding::external::{create_external_padding_block, is_external_padding_block}; use crate::data::simple::{ Attribute, ElGamalEncryptable, ElGamalEncrypted, EncryptedAttribute, EncryptedPseudonym, Pseudonym, @@ -1080,65 +1023,6 @@ fn from_bytes_padded_impl(data: &[u8]) -> Vec { result } -/// Magic marker for external padding blocks. -/// This sequence is impossible in valid PKCS#7 padding for 16-byte blocks -/// (valid padding bytes are 0x01-0x10, so 0xFF is impossible). -const EXTERNAL_PADDING_MAGIC: [u8; 4] = [0xFF, 0xEE, 0xDD, 0xCC]; - -/// Creates an external padding block with metadata about the original block count. -/// -/// Format: [0xFF, 0xEE, 0xDD, 0xCC, original_count (8 bytes as u64 LE), 0x00, 0x00, 0x00, 0x00] -/// -/// The magic marker (0xFF, 0xEE, 0xDD, 0xCC) is impossible in PKCS#7 padding for 16-byte blocks, -/// making this unambiguous. The original block count allows the decoder to know exactly where -/// the real data ends and external padding begins. -fn create_external_padding_block(original_block_count: usize) -> [u8; 16] { - let mut block = [0u8; 16]; - block[0..4].copy_from_slice(&EXTERNAL_PADDING_MAGIC); - block[4..12].copy_from_slice(&(original_block_count as u64).to_le_bytes()); - // Remaining 4 bytes stay as 0x00 - block -} - -/// Checks if a block is an external padding block and extracts the original block count. -/// -/// Returns `Some(original_block_count)` if this is an external padding block, -/// or `None` if this is a regular data block. -/// -/// # Disambiguation Guarantee -/// -/// - External padding blocks **always** end with `[0x00, 0x00, 0x00, 0x00]` -/// - PKCS#7 data blocks **never** have `0x00` in the last byte (valid padding: `0x01`-`0x10`) -/// -/// This means **ALL possible byte sequences can be encoded without ambiguity**, including: -/// - Data starting with the magic marker `[0xFF, 0xEE, 0xDD, 0xCC]` -/// - Data exactly matching the external padding pattern -/// - Any combination of bytes whatsoever -fn is_external_padding_block(block: &[u8]) -> Option { - if block.len() != 16 { - return None; - } - - // Check magic marker - if block[0..4] != EXTERNAL_PADDING_MAGIC { - return None; - } - - // Check that last 4 bytes are zeros (as created by create_external_padding_block) - // This prevents false positives where legitimate data happens to start with the magic marker - if block[12..16] != [0x00, 0x00, 0x00, 0x00] { - return None; - } - - // Extract original block count - let count_bytes: [u8; 8] = match block[4..12].try_into() { - Ok(bytes) => bytes, - Err(_) => return None, - }; - - Some(u64::from_le_bytes(count_bytes) as usize) -} - /// Internal helper function to decode padded bytes. /// /// This function automatically detects and skips external padding blocks diff --git a/src/lib/data/padding.rs b/src/lib/data/padding.rs deleted file mode 100644 index a3d4b0f..0000000 --- a/src/lib/data/padding.rs +++ /dev/null @@ -1,303 +0,0 @@ -//! PKCS#7 padding support for single-block (16 byte) encoding. -//! -//! This module provides the `Padded` trait for encoding data up to 15 bytes using PKCS#7 padding. -//! For multi-block data, see the `long` module. - -use crate::data::simple::{Attribute, ElGamalEncryptable, Pseudonym}; -use std::io::{Error, ErrorKind}; - -/// A trait for encryptable types that support PKCS#7 padding for single-block (16 byte) encoding. -pub trait Padded: ElGamalEncryptable { - /// Encodes an arbitrary byte array using PKCS#7 padding. - /// - /// # Parameters - /// - /// - `data`: The bytes to encode (must be at most 15 bytes) - /// - /// # Errors - /// - /// Returns an error if the data exceeds 15 bytes. - fn from_bytes_padded(data: &[u8]) -> Result - where - Self: Sized, - { - if data.len() > 15 { - return Err(Error::new( - ErrorKind::InvalidInput, - format!("Data too long: {} bytes (max 15)", data.len()), - )); - } - - // Create padded block using PKCS#7 padding - let padding_byte = (16 - data.len()) as u8; - let mut block = [padding_byte; 16]; - block[..data.len()].copy_from_slice(data); - - Ok(Self::from_lizard(&block)) - } - - /// Encodes a string using PKCS#7 padding. - /// - /// # Parameters - /// - /// - `text`: The string to encode (must be at most 15 bytes when UTF-8 encoded) - /// - /// # Errors - /// - /// Returns an error if the string exceeds 15 bytes. - fn from_string_padded(text: &str) -> Result - where - Self: Sized, - { - Self::from_bytes_padded(text.as_bytes()) - } - - /// Decodes back to the original string. - /// - /// # Errors - /// - /// Returns an error if: - /// - The padding is invalid - /// - The decoded bytes are not valid UTF-8 - /// - The value was not created using `from_bytes_padded` or `from_string_padded` - fn to_string_padded(&self) -> Result { - let bytes = self.to_bytes_padded()?; - String::from_utf8(bytes).map_err(|e| Error::new(ErrorKind::InvalidData, e.to_string())) - } - - /// Decodes back to the original byte array. - /// - /// # Errors - /// - /// Returns an error if: - /// - The padding is invalid - /// - The value was not created using `from_bytes_padded` or `from_string_padded` - fn to_bytes_padded(&self) -> Result, Error> { - let block = self.to_lizard().ok_or(Error::new( - ErrorKind::InvalidData, - "Value is not a valid padded value", - ))?; - - let padding_byte = block[15]; - - if padding_byte == 0 || padding_byte > 16 { - return Err(Error::new(ErrorKind::InvalidData, "Invalid padding")); - } - - if block[16 - padding_byte as usize..] - .iter() - .any(|&b| b != padding_byte) - { - return Err(Error::new(ErrorKind::InvalidData, "Inconsistent padding")); - } - - let data_bytes = 16 - padding_byte as usize; - Ok(block[..data_bytes].to_vec()) - } -} - -impl Padded for Pseudonym {} -impl Padded for Attribute {} - -#[cfg(test)] -#[allow(clippy::unwrap_used, clippy::expect_used)] -mod tests { - use super::*; - use std::io::ErrorKind; - - // Generic test helper functions - - fn test_from_bytes_padded_roundtrip() { - let test_cases = [ - b"" as &[u8], - b"a", - b"hello", - b"Hello, world!", - b"123456789012345", // 15 bytes (max) - ]; - - for data in test_cases { - let value = T::from_bytes_padded(data).unwrap(); - let decoded = value.to_bytes_padded().unwrap(); - assert_eq!(data, decoded.as_slice(), "Failed for input: {:?}", data); - } - } - - fn test_from_string_padded_roundtrip() { - let test_cases = ["", "a", "hello", "Hello, world!", "123456789012345"]; - - for text in test_cases { - let value = T::from_string_padded(text).unwrap(); - let decoded = value.to_string_padded().unwrap(); - assert_eq!(text, decoded.as_str(), "Failed for input: {:?}", text); - } - } - - fn test_too_long() { - let data = b"This is 16 bytes"; // Exactly 16 bytes - let result = T::from_bytes_padded(data); - assert!(result.is_err()); - assert_eq!(result.unwrap_err().kind(), ErrorKind::InvalidInput); - - let data = b"This is way more than 15 bytes!"; - let result = T::from_bytes_padded(data); - assert!(result.is_err()); - assert_eq!(result.unwrap_err().kind(), ErrorKind::InvalidInput); - - let text = "This is way more than 15 bytes!"; - let result = T::from_string_padded(text); - assert!(result.is_err()); - assert_eq!(result.unwrap_err().kind(), ErrorKind::InvalidInput); - } - - fn test_padding_correctness() { - // Test empty data (should pad with 16 bytes of value 16) - let value = T::from_bytes_padded(b"").unwrap(); - let bytes = value.to_lizard().unwrap(); - assert_eq!([16u8; 16], bytes); - - // Test 1 byte (should pad with 15 bytes of value 15) - let value = T::from_bytes_padded(b"X").unwrap(); - let bytes = value.to_lizard().unwrap(); - assert_eq!(b'X', bytes[0]); - for byte in bytes.iter().skip(1) { - assert_eq!(15, *byte); - } - - // Test 15 bytes (should pad with 1 byte of value 1) - let data = b"123456789012345"; - let value = T::from_bytes_padded(data).unwrap(); - let bytes = value.to_lizard().unwrap(); - assert_eq!(data, &bytes[..15]); - assert_eq!(1, bytes[15]); - } - - fn test_invalid_padding_decode() { - // Create a value with invalid padding (padding byte = 0) - let invalid_block = [0u8; 16]; - let value = T::from_lizard(&invalid_block); - let result = value.to_bytes_padded(); - assert!(result.is_err()); - assert_eq!(result.unwrap_err().kind(), ErrorKind::InvalidData); - - // Create a value with inconsistent padding - let mut inconsistent_block = [5u8; 16]; - inconsistent_block[15] = 6; // Wrong padding byte - let value = T::from_lizard(&inconsistent_block); - let result = value.to_bytes_padded(); - assert!(result.is_err()); - assert_eq!(result.unwrap_err().kind(), ErrorKind::InvalidData); - - // Create a value with padding byte > 16 - let mut invalid_block = [17u8; 16]; - invalid_block[0] = b'X'; // Some data - let value = T::from_lizard(&invalid_block); - let result = value.to_bytes_padded(); - assert!(result.is_err()); - assert_eq!(result.unwrap_err().kind(), ErrorKind::InvalidData); - } - - fn test_roundtrip_all_sizes() { - // Test roundtrip for all possible data sizes (0-15 bytes) - for size in 0..=15 { - let data = vec![b'X'; size]; - let value = T::from_bytes_padded(&data).unwrap(); - let decoded = value.to_bytes_padded().unwrap(); - assert_eq!(data, decoded, "Failed for size {}", size); - } - } - - // Pseudonym tests - - #[test] - fn pseudonym_from_bytes_padded() { - test_from_bytes_padded_roundtrip::(); - } - - #[test] - fn pseudonym_from_string_padded() { - test_from_string_padded_roundtrip::(); - } - - #[test] - fn pseudonym_too_long() { - test_too_long::(); - } - - #[test] - fn pseudonym_padding_correctness() { - test_padding_correctness::(); - } - - #[test] - fn pseudonym_invalid_padding_decode() { - test_invalid_padding_decode::(); - } - - #[test] - fn pseudonym_roundtrip_all_sizes() { - test_roundtrip_all_sizes::(); - } - - // Attribute tests - - #[test] - fn attribute_from_bytes_padded() { - test_from_bytes_padded_roundtrip::(); - } - - #[test] - fn attribute_from_string_padded() { - test_from_string_padded_roundtrip::(); - } - - #[test] - fn attribute_too_long() { - test_too_long::(); - } - - #[test] - fn attribute_padding_correctness() { - test_padding_correctness::(); - } - - #[test] - fn attribute_invalid_padding_decode() { - test_invalid_padding_decode::(); - } - - #[test] - fn attribute_roundtrip_all_sizes() { - test_roundtrip_all_sizes::(); - } - - // Attribute-specific tests (Unicode handling) - - #[test] - fn attribute_unicode() { - let test_cases = [ - "café", // 5 bytes (é is 2 bytes) - "你好", // 6 bytes (each Chinese char is 3 bytes) - "🎉", // 4 bytes (emoji) - ]; - - for text in test_cases { - let attr = Attribute::from_string_padded(text).unwrap(); - let decoded = attr.to_string_padded().unwrap(); - assert_eq!(text, decoded.as_str(), "Failed for input: {:?}", text); - } - } - - #[test] - fn attribute_unicode_too_long() { - // A string that looks short but is > 16 bytes in UTF-8 - let text = "你好世界!"; // 15 bytes (5 chars × 3 bytes each) - let result = Attribute::from_string_padded(text); - assert!(result.is_ok()); // Should fit - - let text = "你好世界!!"; // 18 bytes (6 chars × 3 bytes each) - let result = Attribute::from_string_padded(text); - assert!(result.is_err()); // Should not fit - assert_eq!(result.unwrap_err().kind(), ErrorKind::InvalidInput); - } -} From ede4d0efc1d300024b90da780888d54af6ee9353 Mon Sep 17 00:00:00 2001 From: Job Doesburg Date: Fri, 13 Feb 2026 14:09:09 +0100 Subject: [PATCH 08/11] Use zero padding --- src/lib/data/json/data.rs | 5 +- src/lib/data/long.rs | 155 ++++++------- src/lib/data/padding/external.rs | 135 ++++++++++++ src/lib/data/padding/internal.rs | 366 +++++++++++++++++++++++++++++++ src/lib/data/padding/mod.rs | 16 ++ 5 files changed, 586 insertions(+), 91 deletions(-) create mode 100644 src/lib/data/padding/external.rs create mode 100644 src/lib/data/padding/internal.rs create mode 100644 src/lib/data/padding/mod.rs diff --git a/src/lib/data/json/data.rs b/src/lib/data/json/data.rs index c619110..631879f 100644 --- a/src/lib/data/json/data.rs +++ b/src/lib/data/json/data.rs @@ -239,9 +239,8 @@ impl PEPJSONValue { /// when encrypted. This is necessary for batch transcryption where all encrypted values must /// have identical structure to prevent linkability. /// - /// The external padding blocks are special marker blocks that contain a magic byte - /// pattern (for example `[0xFF, 0xEE, 0xDD, 0xCC, ...]`) and no user data. These - /// marker blocks are automatically detected as padding and stripped during decoding, + /// The external padding blocks are all-zero blocks `[0x00, 0x00, ...]` that contain no user data. + /// These padding blocks are automatically detected and removed during decoding, /// ensuring the original values are perfectly preserved. /// /// # Parameters diff --git a/src/lib/data/long.rs b/src/lib/data/long.rs index eead13a..688df9a 100644 --- a/src/lib/data/long.rs +++ b/src/lib/data/long.rs @@ -182,11 +182,10 @@ impl LongPseudonym { /// /// ## How it Works /// - /// - Appends one or more **external padding blocks** after the data blocks - /// using a magic marker pattern: - /// `[0xFF, 0xEE, 0xDD, 0xCC, original_count (8 bytes u64 LE), 0x00, 0x00, 0x00, 0x00]` + /// - Appends one or more **all-zero external padding blocks** after the data blocks: + /// `[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]` /// - These blocks are **separate from** the internal PKCS#7 padding within blocks - /// - External padding is automatically detected and removed during decoding + /// - During decoding, scans backwards removing all-zero blocks until finding the data /// - The original pseudonym value is perfectly preserved /// /// ## Parameters @@ -239,8 +238,8 @@ impl LongPseudonym { return Ok(self.clone()); } - // Create external padding blocks with metadata about the original block count - let padding_pattern = create_external_padding_block(current_blocks); + // Create external padding blocks (all zeros) + let padding_pattern = create_external_padding_block(); let padding_block = Pseudonym::from_lizard(&padding_pattern); let mut blocks = self.0.clone(); @@ -329,9 +328,9 @@ impl LongAttribute { /// Pads this `LongAttribute` to a target number of blocks. /// /// This is useful for batch operations where all attributes must have the same structure. - /// Any additional padding blocks use a special magic-marker format - /// `[0xFF, 0xEE, 0xDD, 0xCC, original_count (8 bytes u64 LE), 0x00, 0x00, 0x00, 0x00]` - /// and are automatically detected and skipped during decoding. + /// Additional padding blocks are all-zero blocks: + /// `[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]` + /// which are automatically detected and removed during decoding. /// /// # Parameters /// @@ -375,8 +374,8 @@ impl LongAttribute { return Ok(self.clone()); } - // Create external padding blocks with metadata about the original block count - let padding_pattern = create_external_padding_block(current_blocks); + // Create external padding blocks (all zeros) + let padding_pattern = create_external_padding_block(); let padding_block = Attribute::from_lizard(&padding_pattern); let mut blocks = self.0.clone(); @@ -1025,11 +1024,11 @@ fn from_bytes_padded_impl(data: &[u8]) -> Vec { /// Internal helper function to decode padded bytes. /// -/// This function automatically detects and skips external padding blocks +/// This function automatically detects and removes external padding blocks /// created by `pad_to()`, ensuring that normalized values decode correctly. /// -/// External padding uses a magic marker `[0xFF, 0xEE, 0xDD, 0xCC, ...]` which is -/// impossible in valid PKCS#7 padding (valid padding bytes are 0x01-0x10). +/// External padding uses all-zero blocks `[0x00, ...]` which are impossible +/// in valid PKCS#7 padding (valid padding bytes are 0x01-0x10). fn to_bytes_padded_impl(items: &[T]) -> Result, Error> { if items.is_empty() { return Err(Error::new( @@ -1038,31 +1037,27 @@ fn to_bytes_padded_impl(items: &[T]) -> Result, E )); } - // Check if there are external padding blocks at the end - // If so, extract the original block count from them - let last_data_block_idx = if !items.is_empty() { - let last_block = items[items.len() - 1].to_lizard().ok_or(Error::new( + // Scan backwards from the end to remove external padding blocks (all-zero blocks) + // Stop when we find a non-padding block (which will have PKCS#7 padding) + if items.is_empty() { + return Err(Error::new(ErrorKind::InvalidData, "Empty data")); + } + + let mut last_data_block_idx = items.len() - 1; + while last_data_block_idx > 0 { + let block = items[last_data_block_idx].to_lizard().ok_or(Error::new( ErrorKind::InvalidData, "Encryptable conversion to bytes failed", ))?; - if let Some(original_count) = is_external_padding_block(&last_block) { - // We have external padding, use the encoded original block count - // Check for underflow: original_count must be at least 1 - if original_count == 0 || original_count > items.len() { - return Err(Error::new( - ErrorKind::InvalidData, - "External padding contains invalid block count", - )); - } - original_count - 1 // -1 because we use it as an index + if is_external_padding_block(&block) { + // This is external padding, continue scanning backwards + last_data_block_idx -= 1; } else { - // No external padding, last block is the last data block - items.len() - 1 + // Found a data block, stop scanning + break; } - } else { - return Err(Error::new(ErrorKind::InvalidData, "Empty data")); - }; + } let mut result = Vec::with_capacity((last_data_block_idx + 1) * 16); @@ -1623,9 +1618,8 @@ mod tests { #[test] fn long_attribute_data_containing_magic_marker_multiblock() { - // Edge case: Data that contains the magic marker bytes [0xFF, 0xEE, 0xDD, 0xCC] - // CAN be encoded if it spans multiple blocks (>16 bytes). - // The magic marker check only applies to the last block. + // Edge case: Data that contains bytes [0xFF, 0xEE, 0xDD, 0xCC] + // CAN be encoded regardless - external padding is all zeros now. let data = vec![ 0xFF, 0xEE, 0xDD, 0xCC, 0x99, 0x88, 0x77, 0x66, 0x55, 0x44, 0x33, 0x22, 0x11, 0x00, 0xAA, 0xBB, 0xCC, @@ -1634,100 +1628,90 @@ mod tests { let decoded = attr.to_bytes_padded().unwrap(); assert_eq!( data, decoded, - "Multi-block data containing magic marker should roundtrip correctly" + "Multi-block data with any bytes should roundtrip correctly" ); } #[test] fn long_attribute_single_block_with_magic_marker() { - // Edge case: Data starting with magic marker CAN now be encoded - // because we check that the last 4 bytes are zeros. - // - // After PKCS#7: [0xFF, 0xEE, 0xDD, 0xCC, 0x99, 0x88, 0x77, 0x66, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08] - // Last 4 bytes are [0x08, 0x08, 0x08, 0x08], not [0x00, 0x00, 0x00, 0x00] - // So this is NOT detected as external padding and roundtrips correctly. + // Edge case: Data starting with [0xFF, 0xEE, 0xDD, 0xCC] works fine. + // After PKCS#7, the last byte will be 0x08 (padding), not 0x00. + // External padding is all zeros, so this won't be confused. let data = vec![0xFF, 0xEE, 0xDD, 0xCC, 0x99, 0x88, 0x77, 0x66]; let attr = LongAttribute::from_bytes_padded(&data); let decoded = attr.to_bytes_padded().unwrap(); assert_eq!( data, decoded, - "Single-block data starting with magic marker should roundtrip correctly" + "Single-block data with any bytes should roundtrip correctly" ); } #[test] fn long_attribute_data_exactly_matching_external_padding_pattern() { - // Edge case: Data that exactly matches our external padding block format. - // After PKCS#7 encoding, this should still work correctly. + // Edge case: Data that is all zeros. + // After PKCS#7 encoding, the last byte will be a padding value (0x01-0x10), not 0x00. let data = vec![ - 0xFF, 0xEE, 0xDD, 0xCC, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, ]; let attr = LongAttribute::from_bytes_padded(&data); - // After PKCS#7: will be 2 blocks: [data][full padding block] - // The last block will be [0x10×16], not the magic marker - assert_eq!(attr.len(), 2); + // After PKCS#7: will be 1 block with last byte = 0x01 + // Not all zeros, so won't be confused with external padding + assert_eq!(attr.len(), 1); let decoded = attr.to_bytes_padded().unwrap(); - assert_eq!( - data, decoded, - "Data matching external padding pattern should roundtrip correctly" - ); + assert_eq!(data, decoded, "All-zero data should roundtrip correctly"); } #[test] - fn long_attribute_double_pad_to_fails() { - // Edge case: Calling pad_to() multiple times should fail to decode - // because the decoder will find nested external padding blocks. + fn long_attribute_double_pad_to_works() { + // With all-zero external padding, calling pad_to() multiple times works correctly. + // The decoder scans backwards removing all zero blocks until it finds the data block. let attr = LongAttribute::from_string_padded("test"); let padded_once = attr.pad_to(2).unwrap(); let padded_twice = padded_once.pad_to(3).unwrap(); - // This should fail because block 1 (external padding with count=1) - // doesn't have valid PKCS#7 padding + // This should succeed - all zero blocks are removed, leaving just the data let result = padded_twice.to_string_padded(); - assert!(result.is_err(), "Double pad_to should fail to decode"); + assert!( + result.is_ok(), + "Double pad_to should succeed with all-zero padding" + ); + assert_eq!(result.unwrap(), "test"); } #[test] fn verify_no_ambiguous_edge_cases() { // Comprehensive verification that ALL data can be encoded without ambiguity - // This test confirms the fix for the edge case where single-block data - // starting with the magic marker [0xFF, 0xEE, 0xDD, 0xCC] previously failed. + // External padding is all-zero blocks, and PKCS#7 ensures the last byte is never 0x00. - // Test 1: Single-block data starting with magic marker (previously failed) + // Test 1: Arbitrary data let data1 = vec![0xFF, 0xEE, 0xDD, 0xCC, 0x99, 0x88, 0x77, 0x66]; let attr1 = LongAttribute::from_bytes_padded(&data1); let decoded1 = attr1.to_bytes_padded().unwrap(); - assert_eq!( - data1, decoded1, - "Single-block data with magic marker should work" - ); + assert_eq!(data1, decoded1, "Arbitrary data should work"); - // Test 2: Exact external padding pattern as data (with PKCS#7, last block differs) + // Test 2: All-zero data (PKCS#7 adds non-zero padding) let data2 = vec![ - 0xFF, 0xEE, 0xDD, 0xCC, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, ]; let attr2 = LongAttribute::from_bytes_padded(&data2); let decoded2 = attr2.to_bytes_padded().unwrap(); - assert_eq!( - data2, decoded2, - "Data matching external padding pattern should work" - ); + assert_eq!(data2, decoded2, "All-zero data should work"); - // Test 3: Magic marker in middle of multi-block data + // Test 3: Mixed zeros and non-zeros let data3 = vec![ - 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0xFF, 0xEE, 0xDD, 0xCC, 0x11, 0x12, + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x00, 0x00, 0x00, 0x00, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, ]; let attr3 = LongAttribute::from_bytes_padded(&data3); let decoded3 = attr3.to_bytes_padded().unwrap(); - assert_eq!(data3, decoded3, "Magic marker in middle should work"); + assert_eq!(data3, decoded3, "Mixed data should work"); - // Test 4: pad_to() still works correctly + // Test 4: pad_to() works correctly let attr4 = LongAttribute::from_string_padded("hello"); let padded = attr4.pad_to(3).unwrap(); assert_eq!( @@ -1738,19 +1722,14 @@ mod tests { let decoded4 = padded.to_string_padded().unwrap(); assert_eq!("hello", decoded4, "pad_to should preserve original data"); - // Test 5: Various lengths starting with magic marker + // Test 5: Various lengths with different byte patterns for len in 1..=32 { - let mut data = vec![0xFF, 0xEE, 0xDD, 0xCC]; - data.extend(vec![0xAB; len]); + let mut data = vec![0x00; len]; // All zeros + data[0] = 0xFF; // Make first byte non-zero let attr = LongAttribute::from_bytes_padded(&data); let decoded = attr.to_bytes_padded().unwrap(); - assert_eq!( - data, - decoded, - "Data of length {} starting with magic marker should work", - len + 4 - ); + assert_eq!(data, decoded, "Data of length {} should work", len); } } } diff --git a/src/lib/data/padding/external.rs b/src/lib/data/padding/external.rs new file mode 100644 index 0000000..c37b8f6 --- /dev/null +++ b/src/lib/data/padding/external.rs @@ -0,0 +1,135 @@ +//! External padding for batch unlinkability. +//! +//! This module provides functions to create and detect external padding blocks used by the +//! `pad_to()` method on long data types for batch unlinkability. +//! +//! # Purpose +//! +//! External padding normalizes different-sized values to identical structure for unlinkable batch transcryption. +//! +//! # When Used +//! +//! - Explicitly via the `pad_to(n)` method on long types +//! - Only for multi-block data (see [`long`](crate::data::long) module) +//! - Required when batch processing needs unlinkability guarantees +//! +//! # How It Works +//! +//! - Adds full 16-byte all-zero blocks after the data +//! - Format: `[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]` +//! - During decoding, scans backwards from the end removing all-zero blocks until a data block is found +//! - Automatically detected and removed during decoding +//! +//! # Order of Operations +//! +//! **Encoding:** +//! 1. PKCS#7 padding is applied first to the last data block +//! 2. All-zero external padding blocks are added after +//! +//! **Decoding:** +//! 1. Scan backwards from the end, removing all-zero blocks +//! 2. Stop when a non-zero block is found (the last data block with PKCS#7 padding) +//! 3. Remove PKCS#7 padding from the last data block +//! +//! This ordering ensures that even if data is all zeros, PKCS#7 padding will change the last +//! byte to `0x01`-`0x10`, guaranteeing it won't be detected as an external padding block. +//! +//! # Example +//! +//! ```text +//! After pad_to(3): +//! Block 1: [h e l l o | 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B] ← data with PKCS#7 +//! Block 2: [0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00] ← padding +//! Block 3: [0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00] ← padding +//! ``` +//! +//! # Disambiguation Guarantee +//! +//! External padding blocks are **all zeros**, while PKCS#7 padded blocks **never** have `0x00` +//! in the last byte (valid PKCS#7 padding: `0x01`-`0x10`). +//! +//! Because PKCS#7 padding is applied first during encoding, it **always** changes the last +//! byte of any data block to `0x01`-`0x10`. This deterministically prevents any data from being +//! mistaken for an external padding block. +//! +//! This means **ALL possible byte sequences can be encoded without ambiguity**, including values +//! that are all zeros (PKCS#7 changes the last byte) + +/// Creates an external padding block. +/// +/// Format: [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00] +/// +/// All-zero blocks are impossible for PKCS#7 padded data blocks (valid padding bytes are 0x01-0x10), +/// making this unambiguous. +pub(crate) fn create_external_padding_block() -> [u8; 16] { + [0u8; 16] +} + +/// Checks if a block is an external padding block. +/// +/// Returns `true` if this block is all zeros (external padding), +/// or `false` if this is a regular data block. +/// +/// # Disambiguation Guarantee +/// +/// - External padding blocks are **all zeros** +/// - PKCS#7 padded data blocks **never** have `0x00` in the last byte (valid padding: `0x01`-`0x10`) +/// +/// This means **ALL possible byte sequences can be encoded without ambiguity**, including: +/// - Data blocks that are all zeros except the last byte (PKCS#7 will set last byte to 0x01-0x10) +/// - Any combination of bytes whatsoever +pub(crate) fn is_external_padding_block(block: &[u8]) -> bool { + if block.len() != 16 { + return false; + } + + // Check if the entire block is all zeros + // This guarantees disambiguation from data blocks because PKCS#7 padding + // always changes the last byte to 0x01-0x10, never 0x00 + block.iter().all(|&b| b == 0) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_create_external_padding_block() { + let block = create_external_padding_block(); + + // Check that the entire block is all zeros + assert_eq!(&block, &[0u8; 16]); + } + + #[test] + fn test_is_external_padding_block_valid() { + let block = create_external_padding_block(); + assert!(is_external_padding_block(&block)); + } + + #[test] + fn test_is_external_padding_block_invalid_length() { + let block = [0x00; 4]; + assert!(!is_external_padding_block(&block)); + } + + #[test] + fn test_is_external_padding_block_not_all_zeros() { + let mut block = [0x00; 16]; + block[0] = 0xFF; // Not all zeros + assert!(!is_external_padding_block(&block)); + } + + #[test] + fn test_disambiguation_pkcs7_never_all_zeros() { + // PKCS#7 valid padding bytes are 0x01-0x10 + // External padding is all zeros + // This guarantees no ambiguity + + for padding_value in 0x01..=0x10u8 { + let pkcs7_block = [padding_value; 16]; + // PKCS#7 block is not all zeros + assert!(!is_external_padding_block(&pkcs7_block)); + } + } +} diff --git a/src/lib/data/padding/internal.rs b/src/lib/data/padding/internal.rs new file mode 100644 index 0000000..9a3f1f1 --- /dev/null +++ b/src/lib/data/padding/internal.rs @@ -0,0 +1,366 @@ +//! Internal PKCS#7 padding for single-block (16 byte) encoding. +//! +//! This module provides the [`Padded`] trait for encoding data up to 15 bytes using PKCS#7 padding. +//! +//! # Purpose +//! +//! PKCS#7 padding ensures data fills complete 16-byte blocks during encryption. +//! +//! # When Used +//! +//! - Automatically applied during encoding/decoding +//! - For single-block data (up to 15 bytes) via the [`Padded`] trait +//! - For multi-block data within the last block (see [`long`](crate::data::long) module) +//! +//! # How It Works +//! +//! - The padding byte value indicates the number of padding bytes +//! - Valid padding bytes are `0x01`-`0x10` +//! - Always applied, even if data is exactly a multiple of 16 bytes +//! +//! # Example +//! +//! ```text +//! "hello" (5 bytes): +//! [h e l l o | 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B] +//! └─ data ─┘ └──────────────── 11 padding bytes ────────────────────┘ +//! ``` +//! +//! # Order of Operations +//! +//! PKCS#7 padding is always applied **before** external padding (see [`external`](crate::data::padding::external)) +//! and removed **after** external padding is removed during decoding. This ordering is critical for +//! disambiguation - even if data matches the external padding pattern, PKCS#7 will change the last +//! byte to `0x01`-`0x10`, guaranteeing correct detection. +//! +//! # Disambiguation Guarantee +//! +//! PKCS#7 padding uses bytes `0x01`-`0x10`, so the last byte is **never** `0x00`. +//! This makes it completely unambiguous from external padding blocks, which are **all zeros**. +//! +//! This means **ALL possible byte sequences can be encoded without ambiguity**. + +use crate::data::simple::{Attribute, ElGamalEncryptable, Pseudonym}; +use std::io::{Error, ErrorKind}; + +/// A trait for encryptable types that support PKCS#7 padding for single-block (16 byte) encoding. +/// +/// This trait provides methods to encode data up to 15 bytes using PKCS#7 padding, +/// which fills the remaining bytes of a 16-byte block with padding bytes. +/// +/// # Padding Format +/// +/// - For `n` bytes of data (0 ≤ n ≤ 15), add `16 - n` padding bytes +/// - Each padding byte has the value `16 - n` +/// - This allows unambiguous removal of padding during decoding +/// +/// # Examples +/// +/// ```ignore +/// use libpep::data::padding::Padded; +/// use libpep::data::simple::Attribute; +/// +/// // Encode a string +/// let attr = Attribute::from_string_padded("hello")?; +/// let decoded = attr.to_string_padded()?; +/// assert_eq!(decoded, "hello"); +/// +/// // Encode bytes +/// let attr = Attribute::from_bytes_padded(b"data")?; +/// let decoded = attr.to_bytes_padded()?; +/// assert_eq!(decoded, b"data"); +/// ``` +pub trait Padded: ElGamalEncryptable { + /// Encodes an arbitrary byte array using PKCS#7 padding. + /// + /// # Parameters + /// + /// - `data`: The bytes to encode (must be at most 15 bytes) + /// + /// # Errors + /// + /// Returns an error if the data exceeds 15 bytes. + fn from_bytes_padded(data: &[u8]) -> Result + where + Self: Sized, + { + if data.len() > 15 { + return Err(Error::new( + ErrorKind::InvalidInput, + format!("Data too long: {} bytes (max 15)", data.len()), + )); + } + + // Create padded block using PKCS#7 padding + let padding_byte = (16 - data.len()) as u8; + let mut block = [padding_byte; 16]; + block[..data.len()].copy_from_slice(data); + + Ok(Self::from_lizard(&block)) + } + + /// Encodes a string using PKCS#7 padding. + /// + /// # Parameters + /// + /// - `text`: The string to encode (must be at most 15 bytes when UTF-8 encoded) + /// + /// # Errors + /// + /// Returns an error if the string exceeds 15 bytes. + fn from_string_padded(text: &str) -> Result + where + Self: Sized, + { + Self::from_bytes_padded(text.as_bytes()) + } + + /// Decodes back to the original string. + /// + /// # Errors + /// + /// Returns an error if: + /// - The padding is invalid + /// - The decoded bytes are not valid UTF-8 + /// - The value was not created using `from_bytes_padded` or `from_string_padded` + fn to_string_padded(&self) -> Result { + let bytes = self.to_bytes_padded()?; + String::from_utf8(bytes).map_err(|e| Error::new(ErrorKind::InvalidData, e.to_string())) + } + + /// Decodes back to the original byte array. + /// + /// # Errors + /// + /// Returns an error if: + /// - The padding is invalid + /// - The value was not created using `from_bytes_padded` or `from_string_padded` + fn to_bytes_padded(&self) -> Result, Error> { + let block = self.to_lizard().ok_or(Error::new( + ErrorKind::InvalidData, + "Value is not a valid padded value", + ))?; + + let padding_byte = block[15]; + + if padding_byte == 0 || padding_byte > 16 { + return Err(Error::new(ErrorKind::InvalidData, "Invalid padding")); + } + + if block[16 - padding_byte as usize..] + .iter() + .any(|&b| b != padding_byte) + { + return Err(Error::new(ErrorKind::InvalidData, "Inconsistent padding")); + } + + let data_bytes = 16 - padding_byte as usize; + Ok(block[..data_bytes].to_vec()) + } +} + +impl Padded for Pseudonym {} +impl Padded for Attribute {} + +#[cfg(test)] +#[allow(clippy::unwrap_used, clippy::expect_used)] +mod tests { + use super::*; + use std::io::ErrorKind; + + // Generic test helper functions + + fn test_from_bytes_padded_roundtrip() { + let test_cases = [ + b"" as &[u8], + b"a", + b"hello", + b"Hello, world!", + b"123456789012345", // 15 bytes (max) + ]; + + for data in test_cases { + let value = T::from_bytes_padded(data).unwrap(); + let decoded = value.to_bytes_padded().unwrap(); + assert_eq!(data, decoded.as_slice(), "Failed for input: {:?}", data); + } + } + + fn test_from_string_padded_roundtrip() { + let test_cases = ["", "a", "hello", "Hello, world!", "123456789012345"]; + + for text in test_cases { + let value = T::from_string_padded(text).unwrap(); + let decoded = value.to_string_padded().unwrap(); + assert_eq!(text, decoded.as_str(), "Failed for input: {:?}", text); + } + } + + fn test_too_long() { + let data = b"This is 16 bytes"; // Exactly 16 bytes + let result = T::from_bytes_padded(data); + assert!(result.is_err()); + assert_eq!(result.unwrap_err().kind(), ErrorKind::InvalidInput); + + let data = b"This is way more than 15 bytes!"; + let result = T::from_bytes_padded(data); + assert!(result.is_err()); + assert_eq!(result.unwrap_err().kind(), ErrorKind::InvalidInput); + + let text = "This is way more than 15 bytes!"; + let result = T::from_string_padded(text); + assert!(result.is_err()); + assert_eq!(result.unwrap_err().kind(), ErrorKind::InvalidInput); + } + + fn test_padding_correctness() { + // Test empty data (should pad with 16 bytes of value 16) + let value = T::from_bytes_padded(b"").unwrap(); + let bytes = value.to_lizard().unwrap(); + assert_eq!([16u8; 16], bytes); + + // Test 1 byte (should pad with 15 bytes of value 15) + let value = T::from_bytes_padded(b"X").unwrap(); + let bytes = value.to_lizard().unwrap(); + assert_eq!(b'X', bytes[0]); + for byte in bytes.iter().skip(1) { + assert_eq!(15, *byte); + } + + // Test 15 bytes (should pad with 1 byte of value 1) + let data = b"123456789012345"; + let value = T::from_bytes_padded(data).unwrap(); + let bytes = value.to_lizard().unwrap(); + assert_eq!(data, &bytes[..15]); + assert_eq!(1, bytes[15]); + } + + fn test_invalid_padding_decode() { + // Create a value with invalid padding (padding byte = 0) + let invalid_block = [0u8; 16]; + let value = T::from_lizard(&invalid_block); + let result = value.to_bytes_padded(); + assert!(result.is_err()); + assert_eq!(result.unwrap_err().kind(), ErrorKind::InvalidData); + + // Create a value with inconsistent padding + let mut inconsistent_block = [5u8; 16]; + inconsistent_block[15] = 6; // Wrong padding byte + let value = T::from_lizard(&inconsistent_block); + let result = value.to_bytes_padded(); + assert!(result.is_err()); + assert_eq!(result.unwrap_err().kind(), ErrorKind::InvalidData); + + // Create a value with padding byte > 16 + let mut invalid_block = [17u8; 16]; + invalid_block[0] = b'X'; // Some data + let value = T::from_lizard(&invalid_block); + let result = value.to_bytes_padded(); + assert!(result.is_err()); + assert_eq!(result.unwrap_err().kind(), ErrorKind::InvalidData); + } + + fn test_roundtrip_all_sizes() { + // Test roundtrip for all possible data sizes (0-15 bytes) + for size in 0..=15 { + let data = vec![b'X'; size]; + let value = T::from_bytes_padded(&data).unwrap(); + let decoded = value.to_bytes_padded().unwrap(); + assert_eq!(data, decoded, "Failed for size {}", size); + } + } + + // Pseudonym tests + + #[test] + fn pseudonym_from_bytes_padded() { + test_from_bytes_padded_roundtrip::(); + } + + #[test] + fn pseudonym_from_string_padded() { + test_from_string_padded_roundtrip::(); + } + + #[test] + fn pseudonym_too_long() { + test_too_long::(); + } + + #[test] + fn pseudonym_padding_correctness() { + test_padding_correctness::(); + } + + #[test] + fn pseudonym_invalid_padding_decode() { + test_invalid_padding_decode::(); + } + + #[test] + fn pseudonym_roundtrip_all_sizes() { + test_roundtrip_all_sizes::(); + } + + // Attribute tests + + #[test] + fn attribute_from_bytes_padded() { + test_from_bytes_padded_roundtrip::(); + } + + #[test] + fn attribute_from_string_padded() { + test_from_string_padded_roundtrip::(); + } + + #[test] + fn attribute_too_long() { + test_too_long::(); + } + + #[test] + fn attribute_padding_correctness() { + test_padding_correctness::(); + } + + #[test] + fn attribute_invalid_padding_decode() { + test_invalid_padding_decode::(); + } + + #[test] + fn attribute_roundtrip_all_sizes() { + test_roundtrip_all_sizes::(); + } + + // Attribute-specific tests (Unicode handling) + + #[test] + fn attribute_unicode() { + let test_cases = [ + "café", // 5 bytes (é is 2 bytes) + "你好", // 6 bytes (each Chinese char is 3 bytes) + "🎉", // 4 bytes (emoji) + ]; + + for text in test_cases { + let attr = Attribute::from_string_padded(text).unwrap(); + let decoded = attr.to_string_padded().unwrap(); + assert_eq!(text, decoded.as_str(), "Failed for input: {:?}", text); + } + } + + #[test] + fn attribute_unicode_too_long() { + // A string that looks short but is > 16 bytes in UTF-8 + let text = "你好世界!"; // 15 bytes (5 chars × 3 bytes each) + let result = Attribute::from_string_padded(text); + assert!(result.is_ok()); // Should fit + + let text = "你好世界!!"; // 18 bytes (6 chars × 3 bytes each) + let result = Attribute::from_string_padded(text); + assert!(result.is_err()); // Should not fit + assert_eq!(result.unwrap_err().kind(), ErrorKind::InvalidInput); + } +} diff --git a/src/lib/data/padding/mod.rs b/src/lib/data/padding/mod.rs new file mode 100644 index 0000000..f5fff63 --- /dev/null +++ b/src/lib/data/padding/mod.rs @@ -0,0 +1,16 @@ +//! Padding mechanisms for PEP data encoding. +//! +//! This module provides two distinct types of padding for PEP data: +//! +//! - **Internal Padding (PKCS#7)**: See the [`internal`] module for PKCS#7 padding used for single-block encoding. +//! - **External Padding**: See the [`external`] module for padding blocks used for batch unlinkability. +//! +//! Both padding types are completely unambiguous and can encode any possible byte sequence. + +pub mod internal; + +#[cfg(feature = "long")] +pub mod external; + +// Re-export the Padded trait for convenience +pub use internal::Padded; From 2ce913ba29cb55a5cffb55fdc3ed16a17634fc21 Mon Sep 17 00:00:00 2001 From: Job Doesburg Date: Fri, 13 Feb 2026 14:25:25 +0100 Subject: [PATCH 09/11] Improvements --- src/lib/data/json/structure.rs | 10 ++++++---- src/lib/data/long.rs | 4 ---- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/lib/data/json/structure.rs b/src/lib/data/json/structure.rs index cd8f9f3..2b5d3c8 100644 --- a/src/lib/data/json/structure.rs +++ b/src/lib/data/json/structure.rs @@ -155,9 +155,11 @@ fn unify_two_structures( // Objects: must have same fields, unify field-wise (JSONStructure::Object(fields1), JSONStructure::Object(fields2)) => { - // Convert to HashMaps for easier lookup - let map1: HashMap<_, _> = fields1.iter().map(|(k, v)| (k, v)).collect(); - let map2: HashMap<_, _> = fields2.iter().map(|(k, v)| (k, v)).collect(); + // Convert to HashMaps for easier lookup (using owned String keys) + let map1: HashMap = + fields1.iter().map(|(k, v)| (k.clone(), v)).collect(); + let map2: HashMap = + fields2.iter().map(|(k, v)| (k.clone(), v)).collect(); // Check that both objects have the same set of keys if map1.len() != map2.len() { @@ -169,7 +171,7 @@ fn unify_two_structures( match map2.get(key) { Some(val2) => { let unified_val = unify_two_structures(val1, val2)?; - unified_fields.push(((*key).clone(), unified_val)); + unified_fields.push((key.clone(), unified_val)); } None => return Err(UnifyError::ObjectFieldMismatch), } diff --git a/src/lib/data/long.rs b/src/lib/data/long.rs index 688df9a..cfa3321 100644 --- a/src/lib/data/long.rs +++ b/src/lib/data/long.rs @@ -1039,10 +1039,6 @@ fn to_bytes_padded_impl(items: &[T]) -> Result, E // Scan backwards from the end to remove external padding blocks (all-zero blocks) // Stop when we find a non-padding block (which will have PKCS#7 padding) - if items.is_empty() { - return Err(Error::new(ErrorKind::InvalidData, "Empty data")); - } - let mut last_data_block_idx = items.len() - 1; while last_data_block_idx > 0 { let block = items[last_data_block_idx].to_lizard().ok_or(Error::new( From b497e47778e0d1cc381ad9967197b04abd1ffba8 Mon Sep 17 00:00:00 2001 From: Julian van der Horst Date: Tue, 17 Feb 2026 16:02:22 +0100 Subject: [PATCH 10/11] Ran fmt --- src/lib/client/batch.rs | 19 +- src/lib/client/functions.rs | 3 +- src/lib/client/py/batch.rs | 301 ++++++++++++++++++++--------- src/lib/client/py/distributed.rs | 41 +++- src/lib/client/py/functions.rs | 120 +++++++++++- src/lib/client/py/types.rs | 17 +- src/lib/client/types.rs | 19 +- src/lib/client/wasm/distributed.rs | 49 +++++ src/lib/client/wasm/types.rs | 19 ++ src/lib/data/json/data.rs | 24 ++- src/lib/data/long.rs | 19 +- src/lib/data/records.rs | 17 +- src/lib/data/simple.rs | 19 +- src/lib/data/traits.rs | 7 + src/lib/data/wasm/json.rs | 56 +++++- src/lib/transcryptor/batch.rs | 5 + tests/python/test_json.py | 77 ++++++++ tests/wasm/json.test.js | 64 +++++- 18 files changed, 761 insertions(+), 115 deletions(-) diff --git a/src/lib/client/batch.rs b/src/lib/client/batch.rs index c523cd2..478a244 100644 --- a/src/lib/client/batch.rs +++ b/src/lib/client/batch.rs @@ -1,6 +1,6 @@ //! Batch operations for encryption and decryption. -use crate::data::traits::{Encryptable, Encrypted}; +use crate::data::traits::{BatchEncryptable, Encryptable, Encrypted}; use crate::transcryptor::batch::BatchError; use rand_core::{CryptoRng, Rng}; @@ -17,6 +17,23 @@ pub fn encrypt_batch( public_key: &M::PublicKeyType, rng: &mut R, ) -> Result, BatchError> +where + M: BatchEncryptable, + R: Rng + CryptoRng, +{ + let preprocessed = M::preprocess_batch(messages)?; + Ok(preprocessed + .iter() + .map(|x| x.encrypt(public_key, rng)) + .collect()) +} + +#[cfg(feature = "insecure")] +pub fn encrypt_batch_raw( + messages: &[M], + public_key: &M::PublicKeyType, + rng: &mut R, +) -> Result, BatchError> where M: Encryptable, R: Rng + CryptoRng, diff --git a/src/lib/client/functions.rs b/src/lib/client/functions.rs index 1dde3bb..e100811 100644 --- a/src/lib/client/functions.rs +++ b/src/lib/client/functions.rs @@ -40,7 +40,8 @@ where /// # Examples /// ```rust,ignore /// let pseudonym = decrypt(&encrypted_pseudonym, &pseudonym_key); -/// let attribute = decrypt(&encrypted_attribute, &attribute_key); +/// let attribute = decrypt(&encrypted_attribute, +/// &attribute_key); /// ``` #[cfg(not(feature = "elgamal3"))] pub fn decrypt(encrypted: &E, secret_key: &E::SecretKeyType) -> E::UnencryptedType diff --git a/src/lib/client/py/batch.rs b/src/lib/client/py/batch.rs index 025037f..5ab7231 100644 --- a/src/lib/client/py/batch.rs +++ b/src/lib/client/py/batch.rs @@ -1,6 +1,6 @@ //! Python bindings for batch transcryption operations. -use crate::client::{decrypt, encrypt}; +use crate::client::{decrypt_batch, encrypt_batch}; use crate::data::py::records::PyEncryptedRecord; #[cfg(feature = "long")] use crate::data::py::records::PyLongEncryptedRecord; @@ -15,7 +15,7 @@ use crate::factors::TranscryptionInfo; use crate::factors::{AttributeRekeyInfo, PseudonymizationInfo}; use crate::keys::py::types::{ PyAttributeSessionPublicKey, PyAttributeSessionSecretKey, PyPseudonymSessionPublicKey, - PyPseudonymSessionSecretKey, + PyPseudonymSessionSecretKey, PyGlobalPublicKeys, AttributeSessionKeys, PseudonymSessionKeys, }; use crate::keys::types::{ AttributeSessionPublicKey, AttributeSessionSecretKey, PseudonymSessionPublicKey, @@ -49,7 +49,8 @@ use crate::data::py::long::{ use crate::data::records::LongEncryptedRecord; #[cfg(feature = "json")] -use crate::data::py::json::PyEncryptedPEPJSONValue; +use crate::data::py::json::{PyPEPJSONValue, PyEncryptedPEPJSONValue}; +use crate::keys::{GlobalPublicKeys, PseudonymSessionKeys, SessionKeys}; /// Polymorphic batch pseudonymization of a list of encrypted pseudonyms. /// Works with both EncryptedPseudonym and LongEncryptedPseudonym. @@ -285,53 +286,87 @@ pub fn py_encrypt_batch(messages: &Bound, public_key: &Bound) -> P let py = messages.py(); let mut rng = rand::rng(); - // Try Vec with PseudonymSessionPublicKey + // Try Vec with PyPseudonymSessionPublicKey if let Ok(ps) = messages.extract::>() { if let Ok(pk) = public_key.extract::() { let key = PseudonymSessionPublicKey::from(pk.0 .0); - let result: Vec = ps - .into_iter() - .map(|p| PyEncryptedPseudonym(encrypt(&p.0, &key, &mut rng))) - .collect(); - return result.into_py_any(py); + let rust_msgs: Vec<_> = ps.into_iter().map(|p| p.0).collect(); + + // True Batch: Shuffles and encrypts + let result = encrypt_batch(&rust_msgs, &key, &mut rng) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; + + let py_result: Vec = result.into_iter().map(PyEncryptedPseudonym).collect(); + return py_result.into_py_any(py); } } - // Try Vec with AttributeSessionPublicKey + // Try Vec with PyAttributeSessionPublicKey if let Ok(attrs) = messages.extract::>() { if let Ok(pk) = public_key.extract::() { let key = AttributeSessionPublicKey::from(pk.0 .0); - let result: Vec = attrs - .into_iter() - .map(|a| PyEncryptedAttribute(encrypt(&a.0, &key, &mut rng))) - .collect(); - return result.into_py_any(py); + let rust_msgs: Vec<_> = attrs.into_iter().map(|a| a.0).collect(); + + let result = encrypt_batch(&rust_msgs, &key, &mut rng) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; + + let py_result: Vec = result.into_iter().map(PyEncryptedAttribute).collect(); + return py_result.into_py_any(py); } } - // Try Vec with PseudonymSessionPublicKey + // Try Vec #[cfg(feature = "long")] if let Ok(lps) = messages.extract::>() { if let Ok(pk) = public_key.extract::() { let key = PseudonymSessionPublicKey::from(pk.0 .0); - let result: Vec = lps - .into_iter() - .map(|p| PyLongEncryptedPseudonym(encrypt(&p.0, &key, &mut rng))) - .collect(); - return result.into_py_any(py); + let rust_msgs: Vec<_> = lps.into_iter().map(|p| p.0).collect(); + + let result = encrypt_batch(&rust_msgs, &key, &mut rng) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; + + let py_result: Vec = result.into_iter().map(PyLongEncryptedPseudonym).collect(); + return py_result.into_py_any(py); } } - // Try Vec with AttributeSessionPublicKey + // Try Vec #[cfg(feature = "long")] if let Ok(las) = messages.extract::>() { if let Ok(pk) = public_key.extract::() { let key = AttributeSessionPublicKey::from(pk.0 .0); - let result: Vec = las - .into_iter() - .map(|a| PyLongEncryptedAttribute(encrypt(&a.0, &key, &mut rng))) - .collect(); - return result.into_py_any(py); + let rust_msgs: Vec<_> = las.into_iter().map(|a| a.0).collect(); + + let result = encrypt_batch(&rust_msgs, &key, &mut rng) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; + + let py_result: Vec = result.into_iter().map(PyLongEncryptedAttribute).collect(); + return py_result.into_py_any(py); + } + } + + // Try Vec with PySessionKeys + #[cfg(feature = "json")] + if let Ok(jsons) = messages.extract::>() { + if let Ok(pk) = public_key.extract::() { + let keys = SessionKeys { + pseudonym: PseudonymSessionKeys { + public: PseudonymGlobalPublicKey(*pk.pseudonym.0), + secret: None, + }, + attribute: AttributeSessionKeys { + public: AttributeGlobalPublicKey(*pk.attribute.0), + secret: None, + }, + }; + let rust_msgs: Vec<_> = jsons.into_iter().map(|j| j.0).collect(); + + // True Batch: Calculates unified padding for JSON structures + let result = encrypt_batch(&rust_msgs, &keys, &mut rng) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; + + let py_result: Vec = result.into_iter().map(PyEncryptedPEPJSONValue).collect(); + return py_result.into_py_any(py); } } @@ -339,7 +374,6 @@ pub fn py_encrypt_batch(messages: &Bound, public_key: &Bound) -> P "encrypt_batch() requires (Vec[unencrypted_type], matching_public_key)", )) } - /// Polymorphic batch decryption. /// Decrypts a list of encrypted messages with a session secret key. #[cfg(feature = "elgamal3")] @@ -351,69 +385,78 @@ pub fn py_decrypt_batch( ) -> PyResult> { let py = encrypted.py(); - // Try Vec with PseudonymSessionSecretKey + // Try Vec with PyPseudonymSessionSecretKey if let Ok(eps) = encrypted.extract::>() { if let Ok(sk) = secret_key.extract::() { let key = PseudonymSessionSecretKey::from(sk.0 .0); - let result: Vec<_> = eps - .into_iter() - .map(|ep| { - decrypt(&ep.0, &key) - .map(PyPseudonym) - .ok_or_else(|| pyo3::exceptions::PyValueError::new_err("Decryption failed")) - }) - .collect::, _>>()?; - return result.into_py_any(py); + let rust_encs: Vec<_> = eps.into_iter().map(|e| e.0).collect(); + + // True Batch Decryption + let result = decrypt_batch(&rust_encs, &key) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)))?; + + let py_result: Vec = result.into_iter().map(PyPseudonym).collect(); + return py_result.into_py_any(py); } } - // Try Vec with AttributeSessionSecretKey + // Try Vec with PyAttributeSessionSecretKey if let Ok(eas) = encrypted.extract::>() { if let Ok(sk) = secret_key.extract::() { let key = AttributeSessionSecretKey::from(sk.0 .0); - let result: Vec<_> = eas - .into_iter() - .map(|ea| { - decrypt(&ea.0, &key) - .map(PyAttribute) - .ok_or_else(|| pyo3::exceptions::PyValueError::new_err("Decryption failed")) - }) - .collect::, _>>()?; - return result.into_py_any(py); + let rust_encs: Vec<_> = eas.into_iter().map(|e| e.0).collect(); + + let result = decrypt_batch(&rust_encs, &key) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)))?; + + let py_result: Vec = result.into_iter().map(PyAttribute).collect(); + return py_result.into_py_any(py); } } - // Try Vec with PseudonymSessionSecretKey + // Try Vec #[cfg(feature = "long")] if let Ok(leps) = encrypted.extract::>() { if let Ok(sk) = secret_key.extract::() { let key = PseudonymSessionSecretKey::from(sk.0 .0); - let result: Vec<_> = leps - .into_iter() - .map(|lep| { - decrypt(&lep.0, &key) - .map(PyLongPseudonym) - .ok_or_else(|| pyo3::exceptions::PyValueError::new_err("Decryption failed")) - }) - .collect::, _>>()?; - return result.into_py_any(py); + let rust_encs: Vec<_> = leps.into_iter().map(|e| e.0).collect(); + + let result = decrypt_batch(&rust_encs, &key) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)))?; + + let py_result: Vec = result.into_iter().map(PyLongPseudonym).collect(); + return py_result.into_py_any(py); } } - // Try Vec with AttributeSessionSecretKey + // Try Vec #[cfg(feature = "long")] if let Ok(leas) = encrypted.extract::>() { if let Ok(sk) = secret_key.extract::() { let key = AttributeSessionSecretKey::from(sk.0 .0); - let result: Vec<_> = leas - .into_iter() - .map(|lea| { - decrypt(&lea.0, &key) - .map(PyLongAttribute) - .ok_or_else(|| pyo3::exceptions::PyValueError::new_err("Decryption failed")) - }) - .collect::, _>>()?; - return result.into_py_any(py); + let rust_encs: Vec<_> = leas.into_iter().map(|e| e.0).collect(); + + let result = decrypt_batch(&rust_encs, &key) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)))?; + + let py_result: Vec = result.into_iter().map(PyLongAttribute).collect(); + return py_result.into_py_any(py); + } + } + + // Try Vec with PySessionKeys + #[cfg(feature = "json")] + if let Ok(jsons) = encrypted.extract::>() { + if let Ok(k) = secret_key.extract::() { + let rust_encs: Vec<_> = jsons.into_iter().map(|j| j.0).collect(); + let keys = SessionKeys::from(k); + + // True Batch Decryption: handles unpadding of JSON structures + let result = decrypt_batch(&rust_encs, &keys) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)))?; + + let py_result: Vec = result.into_iter().map(PyPEPJSONValue).collect(); + return py_result.into_py_any(py); } } @@ -424,62 +467,87 @@ pub fn py_decrypt_batch( /// Polymorphic batch decryption. /// Decrypts a list of encrypted messages with a session secret key. -#[cfg(not(feature = "elgamal3"))] -#[pyfunction] -#[pyo3(name = "decrypt_batch")] pub fn py_decrypt_batch( encrypted: &Bound, secret_key: &Bound, ) -> PyResult> { let py = encrypted.py(); - // Try Vec with PseudonymSessionSecretKey + // Try Vec with PyPseudonymSessionSecretKey if let Ok(eps) = encrypted.extract::>() { if let Ok(sk) = secret_key.extract::() { let key = PseudonymSessionSecretKey::from(sk.0 .0); - let result: Vec = eps - .into_iter() - .map(|ep| PyPseudonym(decrypt(&ep.0, &key))) - .collect(); - return result.into_py_any(py); + let rust_encs: Vec<_> = eps.into_iter().map(|e| e.0).collect(); + + // True Batch Decryption: handles potential reordering/shuffling + let result = decrypt_batch(&rust_encs, &key) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)))?; + + let py_result: Vec = result.into_iter().map(PyPseudonym).collect(); + return py_result.into_py_any(py); } } - // Try Vec with AttributeSessionSecretKey + // Try Vec with PyAttributeSessionSecretKey if let Ok(eas) = encrypted.extract::>() { if let Ok(sk) = secret_key.extract::() { let key = AttributeSessionSecretKey::from(sk.0 .0); - let result: Vec = eas - .into_iter() - .map(|ea| PyAttribute(decrypt(&ea.0, &key))) - .collect(); - return result.into_py_any(py); + let rust_encs: Vec<_> = eas.into_iter().map(|e| e.0).collect(); + + let result = decrypt_batch(&rust_encs, &key) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)))?; + + let py_result: Vec = result.into_iter().map(PyAttribute).collect(); + return py_result.into_py_any(py); } } - // Try Vec with PseudonymSessionSecretKey + // Try Vec #[cfg(feature = "long")] if let Ok(leps) = encrypted.extract::>() { if let Ok(sk) = secret_key.extract::() { let key = PseudonymSessionSecretKey::from(sk.0 .0); - let result: Vec = leps - .into_iter() - .map(|lep| PyLongPseudonym(decrypt(&lep.0, &key))) - .collect(); - return result.into_py_any(py); + let rust_encs: Vec<_> = leps.into_iter().map(|e| e.0).collect(); + + let result = decrypt_batch(&rust_encs, &key) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)))?; + + let py_result: Vec = result.into_iter().map(PyLongPseudonym).collect(); + return py_result.into_py_any(py); } } - // Try Vec with AttributeSessionSecretKey + // Try Vec #[cfg(feature = "long")] if let Ok(leas) = encrypted.extract::>() { if let Ok(sk) = secret_key.extract::() { let key = AttributeSessionSecretKey::from(sk.0 .0); - let result: Vec = leas - .into_iter() - .map(|lea| PyLongAttribute(decrypt(&lea.0, &key))) - .collect(); - return result.into_py_any(py); + let rust_encs: Vec<_> = leas.into_iter().map(|e| e.0).collect(); + + let result = decrypt_batch(&rust_encs, &key) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)))?; + + let py_result: Vec = result.into_iter().map(PyLongAttribute).collect(); + return py_result.into_py_any(py); + } + } + + // Try Vec with PySessionKeys + #[cfg(feature = "json")] + if let Ok(jsons) = encrypted.extract::>() { + if let Ok(pk) = secret_key.extract::() { + let keys = GlobalPublicKeys { + pseudonym: PseudonymGlobalPublicKey(*pk.pseudonym.0), + attribute: AttributeGlobalPublicKey(*pk.attribute.0) + }; + let rust_encs: Vec<_> = jsons.into_iter().map(|j| j.0).collect(); + + // True Batch Decryption: handles automatic unpadding of JSON data + let result = decrypt_batch(&rust_encs, &keys) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)))?; + + let py_result: Vec = result.into_iter().map(PyPEPJSONValue).collect(); + return py_result.into_py_any(py); } } @@ -550,6 +618,19 @@ pub fn py_encrypt_global_batch( } } + // Try Vec with GlobalKeys + #[cfg(feature = "json")] + if let Ok(jsons) = messages.extract::>() { + if let Ok(keys) = public_key.extract::() { + let info = TranscryptionInfo::from(keys); + let result: Vec = jsons + .into_iter() + .map(|j| PyEncryptedPEPJSONValue(j.0.encrypt_global(&info, &mut rng))) + .collect(); + return result.into_py_any(py); + } + } + Err(PyTypeError::new_err( "encrypt_global_batch() requires (Vec[unencrypted_type], matching_global_public_key)", )) @@ -632,6 +713,23 @@ pub fn py_decrypt_global_batch( } } + // Try Vec with GlobalKeys + #[cfg(feature = "json")] + if let Ok(jsons) = encrypted.extract::>() { + if let Ok(keys) = secret_key.extract::() { + let info = TranscryptionInfo::from(keys); + let result: Vec<_> = jsons + .into_iter() + .map(|j| { + j.0.decrypt_global(&info) + .map(PyPEPJSONValue) + .ok_or_else(|| pyo3::exceptions::PyValueError::new_err("Decryption failed")) + }) + .collect::, _>>()?; + return result.into_py_any(py); + } + } + Err(PyTypeError::new_err( "decrypt_global_batch() requires (Vec[encrypted_type], matching_global_secret_key)", )) @@ -698,6 +796,19 @@ pub fn py_decrypt_global_batch( } } + // Try Vec with GlobalKeys + #[cfg(feature = "json")] + if let Ok(jsons) = encrypted.extract::>() { + if let Ok(keys) = secret_key.extract::() { + let info = TranscryptionInfo::from(keys); + let result: Vec = jsons + .into_iter() + .map(|j| PyPEPJSONValue(j.0.decrypt_global(&info))) + .collect(); + return Ok(result.into_py_any(py)?); + } + } + Err(PyTypeError::new_err( "decrypt_global_batch() requires (Vec[encrypted_type], matching_global_secret_key)", )) diff --git a/src/lib/client/py/distributed.rs b/src/lib/client/py/distributed.rs index a5ec9ba..527d32f 100644 --- a/src/lib/client/py/distributed.rs +++ b/src/lib/client/py/distributed.rs @@ -370,8 +370,21 @@ impl PyClient { return py_result.into_py_any(py); } + // Try Vec - uses SessionKeys directly + #[cfg(feature = "json")] + if let Ok(las) = messages.extract::>() { + let msgs: Vec<_> = las.into_iter().map(|a| a.0).collect(); + let result = self + .0 + .encrypt_batch(&msgs, &mut rng) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; + let py_result: Vec = + result.into_iter().map(PyEncryptedPEPJSONValue).collect(); + return py_result.into_py_any(py); + } + Err(PyTypeError::new_err( - "encrypt_batch() requires Vec[Pseudonym], Vec[Attribute], Vec[LongPseudonym], or Vec[LongAttribute]", + "encrypt_batch() requires Vec[Pseudonym], Vec[Attribute], Vec[LongPseudonym], or Vec[LongAttribute], or Vec[PEPJSONValue]", )) } @@ -427,8 +440,19 @@ impl PyClient { return py_result.into_py_any(py); } + // Try Vec - uses SessionKeys directly + #[cfg(feature = "json")] + if let Ok(leas) = encrypted.extract::>() { + let enc: Vec<_> = leas.into_iter().map(|e| e.0).collect(); + let result = self .0 + .decrypt_batch(&enc) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; + let py_result: Vec = result.into_iter().map(PyPEPJSONValue).collect(); + return py_result.into_py_any(py); + } + Err(PyTypeError::new_err( - "decrypt_batch() requires Vec[EncryptedPseudonym], Vec[EncryptedAttribute], Vec[LongEncryptedPseudonym], or Vec[LongEncryptedAttribute]", + "decrypt_batch() requires Vec[EncryptedPseudonym], Vec[EncryptedAttribute], Vec[LongEncryptedPseudonym], or Vec[LongEncryptedAttribute], or Vec[EncryptedPEPJSONValue]", )) } @@ -484,8 +508,19 @@ impl PyClient { return py_result.into_py_any(py); } + // Try Vec - uses SessionKeys directly + #[cfg(feature = "json")] + if let Ok(leas) = encrypted.extract::>() { + let enc: Vec<_> = leas.into_iter().map(|e| e.0).collect(); + let result = self .0 + .decrypt_batch(&enc) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; + let py_result: Vec = result.into_iter().map(PyPEPJSONValue).collect(); + return py_result.into_py_any(py); + } + Err(PyTypeError::new_err( - "decrypt_batch() requires Vec[EncryptedPseudonym], Vec[EncryptedAttribute], Vec[LongEncryptedPseudonym], or Vec[LongEncryptedAttribute]", + "decrypt_batch() requires Vec[EncryptedPseudonym], Vec[EncryptedAttribute], Vec[LongEncryptedPseudonym], or Vec[LongEncryptedAttribute], or Vec[EncryptedPEPJSONValue]", )) } diff --git a/src/lib/client/py/functions.rs b/src/lib/client/py/functions.rs index 696603c..8b12c19 100644 --- a/src/lib/client/py/functions.rs +++ b/src/lib/client/py/functions.rs @@ -18,7 +18,7 @@ use crate::data::py::simple::{ PyAttribute, PyEncryptedAttribute, PyEncryptedPseudonym, PyPseudonym, }; #[cfg(feature = "offline")] -use crate::keys::py::types::{PyAttributeGlobalPublicKey, PyPseudonymGlobalPublicKey}; +use crate::keys::py::types::{PyAttributeGlobalPublicKey, PyPseudonymGlobalPublicKey, PyGlobalPublicKeys}; #[cfg(all(feature = "offline", feature = "insecure"))] use crate::keys::py::types::{PyAttributeGlobalSecretKey, PyPseudonymGlobalSecretKey}; use crate::keys::py::PySessionKeys; @@ -30,10 +30,7 @@ use crate::keys::py::{ use crate::keys::{AttributeGlobalPublicKey, PseudonymGlobalPublicKey}; #[cfg(all(feature = "offline", feature = "insecure"))] use crate::keys::{AttributeGlobalSecretKey, PseudonymGlobalSecretKey}; -use crate::keys::{ - AttributeSessionPublicKey, AttributeSessionSecretKey, PseudonymSessionPublicKey, - PseudonymSessionSecretKey, SessionKeys, -}; +use crate::keys::{AttributeSessionPublicKey, AttributeSessionSecretKey, GlobalPublicKeys, PseudonymSessionPublicKey, PseudonymSessionSecretKey, SessionKeys}; use pyo3::exceptions::PyTypeError; use pyo3::prelude::*; use pyo3::types::PyAny; @@ -367,6 +364,19 @@ pub fn py_encrypt_global(message: &Bound, public_key: &Bound) -> P } } + // Try PEPJSONValue with SessionKeys + #[cfg(feature = "json")] + if let Ok(json) = message.extract::() { + if let Ok(pk) = public_key.extract::() { + let keys = GlobalPublicKeys { + pseudonym: PseudonymGlobalPublicKey(*pk.pseudonym.0), + attribute: AttributeGlobalPublicKey(*pk.attribute.0) + }; + let result = encrypt_global(&json.0, &keys, &mut rng); + return Ok(Py::new(py, PyEncryptedPEPJSONValue(result))?.into_any()); + } + } + Err(PyTypeError::new_err( "encrypt_global() requires (unencrypted_type, matching_global_public_key)", )) @@ -430,6 +440,18 @@ pub fn py_decrypt_global( } } + // Try EncryptedPEPJSONValue with SessionKeys + #[cfg(feature = "json")] + if let Ok(ej) = encrypted.extract::() { + if let Ok(sk) = secret_key.extract::() { + let keys: SessionKeys = sk.clone().into(); + if let Some(result) = decrypt_global(&ej.0, &keys) { + return Ok(Py::new(py, PyPEPJSONValue(result))?.into_any()); + } + return Err(pyo3::exceptions::PyValueError::new_err("Decryption failed")); + } + } + Err(PyTypeError::new_err( "decrypt_global() requires (encrypted_type, matching_global_secret_key)", )) @@ -484,6 +506,16 @@ pub fn py_decrypt_global( } } + // Try EncryptedPEPJSONValue with SessionKeys + #[cfg(feature = "json")] + if let Ok(ej) = encrypted.extract::() { + if let Ok(sk) = secret_key.extract::() { + let keys: SessionKeys = sk.clone().into(); + let result = decrypt_global(&ej.0, &keys); + return Ok(Py::new(py, PyPEPJSONValue(result))?.into_any()); + } + } + Err(PyTypeError::new_err( "decrypt_global() requires (encrypted_type, matching_global_secret_key)", )) @@ -628,6 +660,32 @@ pub fn py_encrypt_batch( } } + // Try PEPJSONValue + SessionKeys + #[cfg(feature = "json")] + if let Ok(sk) = key.extract::() { + if messages[0].extract::().is_ok() { + let rust_msgs: Vec<_> = messages + .iter() + .map(|m| { + m.extract::() + .expect("type already validated") + .0 + }) + .collect(); + let keys: SessionKeys = sk.clone().into(); + let encrypted = encrypt_batch(&rust_msgs, &keys, &mut rng) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("{}", e)))?; + return Ok(encrypted + .into_iter() + .map(|e| { + Py::new(py, PyEncryptedPEPJSONValue(e)) + .expect("PyO3 allocation failed") + .into_any() + }) + .collect()); + } + } + Err(PyTypeError::new_err( "encrypt_batch() requires list of (Pseudonym|Attribute|LongPseudonym|LongAttribute) and matching key", )) @@ -748,6 +806,32 @@ pub fn py_decrypt_batch( } } + // Try EncryptedPEPJSONValue + SessionKeys + #[cfg(feature = "json")] + if let Ok(sk) = key.extract::() { + if encrypted[0].extract::().is_ok() { + let rust_encs: Vec<_> = encrypted + .iter() + .map(|e| { + e.extract::() + .expect("type already validated") + .0 + }) + .collect(); + let keys: SessionKeys = sk.clone().into(); + let decrypted = decrypt_batch(&rust_encs, &keys) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("{}", e)))?; + return Ok(decrypted + .into_iter() + .map(|d| { + Py::new(py, PyPEPJSONValue(d)) + .expect("PyO3 allocation failed") + .into_any() + }) + .collect()); + } + } + Err(PyTypeError::new_err( "decrypt_batch() requires list of encrypted types and matching key", )) @@ -868,6 +952,32 @@ pub fn py_decrypt_batch( } } + // Try EncryptedPEPJSONValue + SessionKeys + #[cfg(feature = "json")] + if let Ok(sk) = key.extract::() { + if encrypted[0].extract::().is_ok() { + let rust_encs: Vec<_> = encrypted + .iter() + .map(|e| { + e.extract::() + .expect("type already validated") + .0 + }) + .collect(); + let keys: SessionKeys = sk.clone().into(); + let decrypted = decrypt_batch(&rust_encs, &keys) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("{}", e)))?; + return Ok(decrypted + .into_iter() + .map(|d| { + Py::new(py, PyPEPJSONValue(d)) + .expect("PyO3 allocation failed") + .into_any() + }) + .collect()); + } + } + Err(PyTypeError::new_err( "decrypt_batch() requires list of encrypted types and matching key", )) diff --git a/src/lib/client/py/types.rs b/src/lib/client/py/types.rs index a4c16d1..42c1275 100644 --- a/src/lib/client/py/types.rs +++ b/src/lib/client/py/types.rs @@ -72,7 +72,7 @@ impl PyOfflineClient { } Err(PyTypeError::new_err( - "encrypt() requires Pseudonym, Attribute, LongPseudonym, or LongAttribute", + "encrypt() requires Pseudonym, Attribute, LongPseudonym, or LongAttribute, or PEPJSONValue", )) } @@ -133,8 +133,21 @@ impl PyOfflineClient { return py_result.into_py_any(py); } + // Try Vec + #[cfg(feature = "json")] + if let Ok(jsons) = messages.extract::>() { + let msgs: Vec<_> = jsons.into_iter().map(|j| j.0).collect(); + let result = self + .0 + .encrypt_batch(&msgs, &mut rng) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; + let py_result: Vec = + result.into_iter().map(PyEncryptedPEPJSONValue).collect(); + return py_result.into_py_any(py); + } + Err(PyTypeError::new_err( - "encrypt_batch() requires Vec[Pseudonym], Vec[Attribute], Vec[LongPseudonym], or Vec[LongAttribute]", + "encrypt_batch() requires Vec[Pseudonym], Vec[Attribute], Vec[LongPseudonym], or Vec[LongAttribute], or Vec[PEPJSONValue]", )) } diff --git a/src/lib/client/types.rs b/src/lib/client/types.rs index 5206968..ca5d42a 100644 --- a/src/lib/client/types.rs +++ b/src/lib/client/types.rs @@ -1,6 +1,6 @@ //! Client type definitions. -use crate::data::traits::{Encryptable, Encrypted}; +use crate::data::traits::{BatchEncryptable, Encryptable, Encrypted}; #[cfg(feature = "offline")] use crate::keys::GlobalPublicKeys; use crate::keys::{KeyProvider, SessionKeys}; @@ -71,13 +71,28 @@ impl Client { rng: &mut R, ) -> Result, crate::transcryptor::BatchError> where - M: Encryptable, + M: BatchEncryptable, SessionKeys: KeyProvider, R: Rng + CryptoRng, { super::batch::encrypt_batch(messages, self.keys.get_key(), rng) } + /// Encrypt a batch of messages without padding or preprocessing. + #[cfg(feature = "insecure")] + pub fn encrypt_batch_raw( + &self, + messages: &[M], + rng: &mut R, + ) -> Result, crate::transcryptor::BatchError> + where + M: Encryptable, + SessionKeys: KeyProvider, + R: Rng + CryptoRng, + { + super::batch::encrypt_batch_raw(messages, self.keys.get_key(), rng) + } + /// Decrypt a batch of encrypted messages with the appropriate session secret key. /// Automatically selects the correct key (pseudonym or attribute) based on the encrypted type. /// With the `elgamal3` feature, returns an error if any decryption fails. diff --git a/src/lib/client/wasm/distributed.rs b/src/lib/client/wasm/distributed.rs index 7bc93c4..3c3a902 100644 --- a/src/lib/client/wasm/distributed.rs +++ b/src/lib/client/wasm/distributed.rs @@ -396,6 +396,25 @@ impl WASMClient { WASMEncryptedPEPJSONValue(encrypted) } + /// Encrypt a PEPJSONValue using session keys. + #[cfg(all(feature = "json", feature = "batch"))] + #[wasm_bindgen(js_name = encryptJSONBatch)] + pub fn wasm_encrypt_json_batch( + &self, + values: Vec, + ) -> Result, wasm_bindgen::JsValue> { + let mut rng = rand::rng(); + let rust_values: Vec<_> = values.into_iter().map(|m| m.0).collect(); + let encrypted = self + .0 + .encrypt_batch(&rust_values, &mut rng) + .map_err(|e| wasm_bindgen::JsValue::from_str(&format!("{}", e)))?; + Ok(encrypted + .into_iter() + .map(WASMEncryptedPEPJSONValue) + .collect()) + } + /// Decrypt an encrypted PEPJSONValue using session keys. #[cfg(all(feature = "json", feature = "elgamal3"))] #[wasm_bindgen(js_name = decryptJSON)] @@ -414,4 +433,34 @@ impl WASMClient { use crate::data::traits::Encrypted; WASMPEPJSONValue(encrypted.0.decrypt(&self.0.keys)) } + + /// Decrypt a batch of encrypted PEPJSONValues using session keys. + #[cfg(all(feature = "json", feature = "batch", feature = "elgamal3"))] + #[wasm_bindgen(js_name = decryptJSONBatch)] + pub fn wasm_decrypt_json_batch( + &self, + encrypted: Vec, + ) -> Result, wasm_bindgen::JsValue> { + let rust_encrypted: Vec<_> = encrypted.into_iter().map(|e| e.0).collect(); + let decrypted = self + .0 + .decrypt_batch(&rust_encrypted) + .map_err(|e| wasm_bindgen::JsValue::from_str(&format!("{}", e)))?; + Ok(decrypted.into_iter().map(WASMPEPJSONValue).collect()) + } + + /// Decrypt a batch of encrypted PEPJSONValues using session keys. + #[cfg(all(feature = "json", feature = "batch", not(feature = "elgamal3")))] + #[wasm_bindgen(js_name = decryptJSONBatch)] + pub fn wasm_decrypt_json_batch( + &self, + encrypted: Vec, + ) -> Result, wasm_bindgen::JsValue> { + let rust_encrypted: Vec<_> = encrypted.into_iter().map(|e| e.0).collect(); + let decrypted = self + .0 + .decrypt_batch(&rust_encrypted) + .map_err(|e| wasm_bindgen::JsValue::from_str(&format!("{}", e)))?; + Ok(decrypted.into_iter().map(WASMPEPJSONValue).collect()) + } } diff --git a/src/lib/client/wasm/types.rs b/src/lib/client/wasm/types.rs index 7b78072..7568df3 100644 --- a/src/lib/client/wasm/types.rs +++ b/src/lib/client/wasm/types.rs @@ -180,4 +180,23 @@ impl WASMOfflinePEPClient { let encrypted = value.0.encrypt_global(&self.0.global_public_keys, &mut rng); WASMEncryptedPEPJSONValue(encrypted) } + + /// Encrypt a batch of PEPJSONValues with global keys. + #[cfg(all(feature = "json", feature = "batch"))] + #[wasm_bindgen(js_name = encryptJSONBatch)] + pub fn wasm_encrypt_json_batch( + &self, + values: Vec, + ) -> Result, wasm_bindgen::JsValue> { + let mut rng = rand::rng(); + let rust_values: Vec<_> = values.into_iter().map(|m| m.0).collect(); + let encrypted = self + .0 + .encrypt_batch(&rust_values, &mut rng) + .map_err(|e| wasm_bindgen::JsValue::from_str(&format!("{}", e)))?; + Ok(encrypted + .into_iter() + .map(WASMEncryptedPEPJSONValue) + .collect()) + } } diff --git a/src/lib/data/json/data.rs b/src/lib/data/json/data.rs index 631879f..baabf48 100644 --- a/src/lib/data/json/data.rs +++ b/src/lib/data/json/data.rs @@ -2,13 +2,14 @@ use super::utils::{bool_to_byte, byte_to_bool, bytes_to_number, number_to_bytes}; use crate::arithmetic::scalars::ScalarNonZero; +use crate::data::json::unify_structures; #[cfg(feature = "long")] use crate::data::long::{ LongAttribute, LongEncryptedAttribute, LongEncryptedPseudonym, LongPseudonym, }; use crate::data::padding::Padded; use crate::data::simple::{Attribute, EncryptedAttribute, EncryptedPseudonym, Pseudonym}; -use crate::data::traits::{Encryptable, Encrypted, Transcryptable}; +use crate::data::traits::{BatchEncryptable, Encryptable, Encrypted, Transcryptable}; use crate::factors::RerandomizeFactor; use crate::factors::TranscryptionInfo; #[cfg(feature = "offline")] @@ -16,12 +17,14 @@ use crate::keys::GlobalPublicKeys; #[cfg(all(feature = "offline", feature = "insecure"))] use crate::keys::GlobalSecretKeys; use crate::keys::SessionKeys; +use crate::transcryptor::BatchError; use rand_core::{CryptoRng, Rng}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; use serde_json::Value; use std::collections::HashMap; use thiserror::Error; + #[derive(Debug, Error)] pub enum JsonError { #[error("invalid boolean byte value: 0x{got:02x}. expected 0x00 or 0x01")] @@ -832,6 +835,25 @@ impl crate::data::traits::HasStructure for EncryptedPEPJSONValue { } } +#[cfg(feature = "batch")] +impl BatchEncryptable for PEPJSONValue { + fn preprocess_batch(items: &[Self]) -> Result, BatchError> { + if items.is_empty() { + return Ok(Vec::new()); + } + + // Collect and unify structures + let structures: Vec<_> = items.iter().map(|v| v.structure()).collect(); + let unified = unify_structures(&structures)?; + + // Pad each item to unified structure + Ok(items + .iter() + .map(|item| item.pad_to(&unified)) + .collect::, _>>()?) + } +} + #[cfg(test)] #[allow(clippy::unwrap_used, clippy::expect_used)] mod tests { diff --git a/src/lib/data/long.rs b/src/lib/data/long.rs index cfa3321..5967e43 100644 --- a/src/lib/data/long.rs +++ b/src/lib/data/long.rs @@ -16,7 +16,9 @@ use crate::data::simple::{ Attribute, ElGamalEncryptable, ElGamalEncrypted, EncryptedAttribute, EncryptedPseudonym, Pseudonym, }; -use crate::data::traits::{Encryptable, Encrypted, Pseudonymizable, Rekeyable, Transcryptable}; +use crate::data::traits::{ + BatchEncryptable, Encryptable, Encrypted, Pseudonymizable, Rekeyable, Transcryptable, +}; use crate::factors::TranscryptionInfo; use crate::factors::{ AttributeRekeyInfo, PseudonymRekeyInfo, PseudonymizationInfo, RerandomizeFactor, @@ -35,6 +37,7 @@ use std::io::{Error, ErrorKind}; #[cfg(all(feature = "offline", feature = "insecure"))] use crate::keys::{AttributeGlobalSecretKey, PseudonymGlobalSecretKey}; +use crate::transcryptor::BatchError; /// A collection of [Pseudonym]s that together represent a larger pseudonym value using PKCS#7 padding. /// @@ -991,6 +994,20 @@ impl crate::data::traits::HasStructure for LongEncryptedAttribute { } } +#[cfg(feature = "batch")] +impl BatchEncryptable for LongPseudonym { + fn preprocess_batch(items: &[Self]) -> Result, BatchError> { + Ok(items.to_vec()) + } +} + +#[cfg(feature = "batch")] +impl BatchEncryptable for LongAttribute { + fn preprocess_batch(items: &[Self]) -> Result, BatchError> { + Ok(items.to_vec()) + } +} + /// Internal helper function to encode bytes with PKCS#7 padding fn from_bytes_padded_impl(data: &[u8]) -> Vec { // Calculate number of full blocks diff --git a/src/lib/data/records.rs b/src/lib/data/records.rs index 15d34e1..d01c845 100644 --- a/src/lib/data/records.rs +++ b/src/lib/data/records.rs @@ -6,7 +6,7 @@ use crate::data::simple::{ Attribute, ElGamalEncrypted, EncryptedAttribute, EncryptedPseudonym, Pseudonym, }; -use crate::data::traits::{Encryptable, Encrypted, Transcryptable}; +use crate::data::traits::{BatchEncryptable, Encryptable, Encrypted, Transcryptable}; use crate::factors::TranscryptionInfo; #[cfg(feature = "offline")] use crate::keys::GlobalPublicKeys; @@ -23,6 +23,7 @@ use crate::data::long::{ #[cfg(feature = "batch")] use crate::data::traits::HasStructure; +use crate::transcryptor::BatchError; /// Structure descriptor for Records - describes the shape without the data. #[derive(Debug, Clone, PartialEq, Eq)] @@ -811,3 +812,17 @@ impl HasStructure for LongEncryptedRecord { } } } + +#[cfg(feature = "batch")] +impl BatchEncryptable for Record { + fn preprocess_batch(items: &[Self]) -> Result, BatchError> { + Ok(items.to_vec()) + } +} + +#[cfg(feature = "batch")] +impl BatchEncryptable for LongRecord { + fn preprocess_batch(items: &[Self]) -> Result, BatchError> { + Ok(items.to_vec()) + } +} diff --git a/src/lib/data/simple.rs b/src/lib/data/simple.rs index 806758d..4f1ac93 100644 --- a/src/lib/data/simple.rs +++ b/src/lib/data/simple.rs @@ -4,12 +4,15 @@ use crate::arithmetic::group_elements::GroupElement; use crate::arithmetic::scalars::ScalarNonZero; use crate::core::elgamal::{ElGamal, ELGAMAL_LENGTH}; -use crate::data::traits::{Encryptable, Encrypted, Pseudonymizable, Rekeyable, Transcryptable}; +use crate::data::traits::{ + BatchEncryptable, Encryptable, Encrypted, Pseudonymizable, Rekeyable, Transcryptable, +}; use crate::factors::TranscryptionInfo; use crate::factors::{ AttributeRekeyInfo, PseudonymRekeyInfo, PseudonymizationInfo, RerandomizeFactor, }; use crate::keys::*; +use crate::transcryptor::BatchError; use derive_more::{Deref, From}; use rand_core::{CryptoRng, Rng}; #[cfg(feature = "serde")] @@ -509,6 +512,20 @@ impl crate::data::traits::HasStructure for EncryptedAttribute { fn structure(&self) -> Self::Structure {} } +#[cfg(feature = "batch")] +impl BatchEncryptable for Pseudonym { + fn preprocess_batch(items: &[Self]) -> Result, BatchError> { + Ok(items.to_vec()) + } +} + +#[cfg(feature = "batch")] +impl BatchEncryptable for Attribute { + fn preprocess_batch(items: &[Self]) -> Result, BatchError> { + Ok(items.to_vec()) + } +} + #[cfg(test)] #[allow(clippy::unwrap_used, clippy::expect_used)] mod tests { diff --git a/src/lib/data/traits.rs b/src/lib/data/traits.rs index 19fe557..c6d28a2 100644 --- a/src/lib/data/traits.rs +++ b/src/lib/data/traits.rs @@ -162,3 +162,10 @@ pub trait HasStructure { /// Get the structure of this encrypted value. fn structure(&self) -> Self::Structure; } + +#[cfg(feature = "batch")] +pub trait BatchEncryptable: Encryptable + Clone { + fn preprocess_batch( + items: &[Self], + ) -> Result, crate::transcryptor::batch::BatchError>; +} diff --git a/src/lib/data/wasm/json.rs b/src/lib/data/wasm/json.rs index 6bb39cb..4cde9e5 100644 --- a/src/lib/data/wasm/json.rs +++ b/src/lib/data/wasm/json.rs @@ -1,6 +1,6 @@ //! WASM bindings for PEP JSON encryption. -use crate::client::{decrypt, encrypt}; +use crate::client::{decrypt, encrypt, encrypt_batch, decrypt_batch}; #[cfg(all(feature = "offline", feature = "insecure"))] use crate::client::{decrypt_global, encrypt_global}; use crate::data::json::builder::PEPJSONBuilder; @@ -326,6 +326,32 @@ pub fn wasm_encrypt_json( WASMEncryptedPEPJSONValue(encrypted) } +/// Batch encrypt a list of PEPJSONValues using session keys. +/// All values must have the same structure, and the resulting encrypted values will be padded to match the maximum block counts for that structure. +/// +/// # Arguments +/// * `values` - Array of PEPJSONValue objects to encrypt +/// * `session_keys` - Session keys containing public and secret keys for both pseudonyms and +/// +/// # Returns +/// An array of EncryptedPEPJSONValue objects, all with the same structure and padded to match the maximum block counts for that structure +#[cfg(feature = "batch")] +#[wasm_bindgen(js_name = encryptJsonBatch)] +pub fn wasm_encrypt_json_batch( + values: Vec, + session_keys: &WASMSessionKeys, +) -> Result, JsValue> { + let mut rng = rand::rng(); + let keys: SessionKeys = (*session_keys).into(); + let rust_values: Vec = values.into_iter().map(|v| v.0).collect(); + let encrypted = encrypt_batch(&rust_values, &keys, &mut rng) + .map_err(|e| JsValue::from_str(&format!("{}", e)))?; + + Ok(encrypted + .into_iter() + .map(WASMEncryptedPEPJSONValue) + .collect()) +} /// Decrypt an EncryptedPEPJSONValue using session keys. /// /// # Arguments @@ -350,6 +376,34 @@ pub fn wasm_decrypt_json( Ok(WASMPEPJSONValue(decrypted)) } +/// Decrypt a batch of EncryptedPEPJSONValues using session keys. +/// +/// # Arguments +/// +/// * `encrypted` - Array of EncryptedPEPJSONValue objects to decrypt +/// * `session_keys` - Session keys containing public and secret keys for both pseudonyms and +/// +/// # Returns +/// An array of PEPJSONValue objects +/// # Errors +/// Returns an error if any value fails to decrypt +#[cfg(feature = "batch")] +#[wasm_bindgen(js_name = decryptJsonBatch)] +pub fn wasm_decrypt_json_batch( + encrypted: Vec, + session_keys: &WASMSessionKeys, +) -> Result, JsValue> { + let keys: SessionKeys = (*session_keys).into(); + let rust_encrypted: Vec = encrypted.into_iter().map(|v| v.0).collect(); + let decrypted = decrypt_batch(&rust_encrypted, &keys) + .map_err(|e| JsValue::from_str(&format!("{}", e)))?; + + Ok(decrypted + .into_iter() + .map(WASMPEPJSONValue) + .collect()) +} + /// Transcrypt a batch of EncryptedPEPJSONValues using a TranscryptionInfo object. /// /// # Arguments diff --git a/src/lib/transcryptor/batch.rs b/src/lib/transcryptor/batch.rs index 525a54b..d70ee2e 100644 --- a/src/lib/transcryptor/batch.rs +++ b/src/lib/transcryptor/batch.rs @@ -1,5 +1,6 @@ //! Batch operations for pseudonymization, rekeying, and transcryption with shuffling. +use crate::data::json::{JsonError, UnifyError}; use crate::data::traits::{HasStructure, Pseudonymizable, Rekeyable, Transcryptable}; use crate::factors::TranscryptionInfo; use rand_core::{CryptoRng, Rng}; @@ -21,6 +22,10 @@ pub enum BatchError { expected_structure: String, actual_structure: String, }, + #[error(transparent)] + UnifyError(#[from] UnifyError), + #[error(transparent)] + JsonError(#[from] JsonError), } /// Fisher-Yates shuffle using rand_core diff --git a/tests/python/test_json.py b/tests/python/test_json.py index 613a7a1..5797b35 100644 --- a/tests/python/test_json.py +++ b/tests/python/test_json.py @@ -19,6 +19,7 @@ ) from libpep.client import ( encrypt, + encrypt_batch, decrypt, ) from libpep.data import json as pepjson @@ -224,6 +225,82 @@ def test_json_batch_transcryption_different_structures(self): f"Error should mention structure mismatch, got: {context.exception}", ) + def test_json_batch_transcryption_same_structure_different_lengths(self): + """ + Test JSON batch transcryption where structures differ in length. + Individual encryptions will fail in a batch, but encrypt_json_batch + should succeed by normalizing/padding the structures. + """ + # Setup keys and secrets + global_keys = make_global_keys() + pseudo_secret = PseudonymizationSecret(b"pseudo-secret") + enc_secret = EncryptionSecret(b"encryption-secret") + + domain_a = PseudonymizationDomain("domain-a") + domain_b = PseudonymizationDomain("domain-b") + session = EncryptionContext("session-1") + + # global_keys[1] is the Public Key + session_keys = make_session_keys(global_keys[1], session, enc_secret) + + # Create two JSON values with same keys but different string lengths + data1 = { + "patient_id": "p1", + "diagnosis": "Flu", + "temperature": 38.5 + } + + data2 = { + "patient_id": "patient-002-with-a-very-long-id-that-changes-length", + "diagnosis": "Flu with a very long description to ensure structure length differs", + "temperature": 38.5 + } + + # Convert to PEP JSON + record1 = PEPJSONBuilder.from_json(data1, ["patient_id"]).build() + record2 = PEPJSONBuilder.from_json(data2, ["patient_id"]).build() + + # 1. Encrypt separately + encrypted1 = encrypt(record1, session_keys) + encrypted2 = encrypt(record2, session_keys) + + # Verify they have different structures due to length + self.assertNotEqual(encrypted1.structure(), encrypted2.structure()) + + transcryption_info = TranscryptionInfo( + domain_a, domain_b, session, session, pseudo_secret, enc_secret + ) + + # 2. Attempt batch transcryption (should fail because structures are not identical) + with self.assertRaises(Exception) as cm: + transcrypt_json_batch([encrypted1, encrypted2], transcryption_info) + + self.assertIn("structure", str(cm.exception).lower()) + + # 3. Use encrypt_json_batch (this automatically pads both to the same structure) + # Note: Depending on your specific pyo3 mapping, this might be in pepjson or client + encrypted_batch = encrypt_batch([record1, record2], session_keys) + + # Verify that the padded structures are now identical + self.assertEqual( + encrypted_batch[0].structure(), + encrypted_batch[1].structure(), + "encrypt_json_batch should have unified the structures via padding" + ) + + # 4. Batch transcrypt the normalized records (should succeed) + transcrypted_batch = transcrypt_json_batch(encrypted_batch, transcryption_info) + + # Verify output + self.assertEqual(len(transcrypted_batch), 2) + + # Decrypt and check data integrity + dec_json1 = decrypt(transcrypted_batch[0], session_keys).to_json() + dec_json2 = decrypt(transcrypted_batch[1], session_keys).to_json() + + self.assertEqual(dec_json1["diagnosis"], "Flu") + self.assertEqual(dec_json2["diagnosis"], "Flu with a very long description to ensure structure length differs") + if __name__ == "__main__": unittest.main() diff --git a/tests/wasm/json.test.js b/tests/wasm/json.test.js index 59da773..ac23f74 100644 --- a/tests/wasm/json.test.js +++ b/tests/wasm/json.test.js @@ -9,7 +9,7 @@ const { PseudonymizationSecret, EncryptionSecret, PseudonymizationDomain, - EncryptionContext, + EncryptionContext, encryptJsonBatch, } = require("../../pkg/libpep.js"); test('test json transcryption with builder', async () => { @@ -203,3 +203,65 @@ test('test json batch transcryption different structures', async () => { transcryptJsonBatch([encrypted1, encrypted2], transcryptionInfo); }).toThrow(/Inconsistent structure in batch/); }); + + +test('test json batch transcryption same structure different lengths', async () => { + // Setup keys and secrets + const globalKeys = makeGlobalKeys(); + const pseudoSecret = new PseudonymizationSecret(Uint8Array.from(Buffer.from("pseudo-secret"))); + const encSecret = new EncryptionSecret(Uint8Array.from(Buffer.from("encryption-secret"))); + + const domainA = new PseudonymizationDomain("domain-a"); + const domainB = new PseudonymizationDomain("domain-b"); + const session = new EncryptionContext("session-1"); + + const sessionKeys = makeSessionKeys(globalKeys.secret, session, encSecret); + + // Create two JSON values with DIFFERENT structures using JavaScript objects + const data1 = { + patient_id: "patient-001", + diagnosis: "Flu", + temperature: 38.5 + }; + + const data2 = { + patient_id: "patient-002 with a very long ID that makes the structure different", + diagnosis: "Flu but with very long description that makes the structure different", + temperature: 38.5 + }; + + // Convert to PEP JSON with different pseudonym fields + const record1 = PEPJSONBuilder.fromJson(data1, ["patient_id"]).build(); + const record2 = PEPJSONBuilder.fromJson(data2, ["patient_id"]).build(); + + // Encrypt both records + const encrypted1 = encryptJson(record1, sessionKeys); + const encrypted2 = encryptJson(record2, sessionKeys); + + // Verify they have different structures + const structure1 = encrypted1.structure(); + const structure2 = encrypted2.structure(); + expect(structure1.equals(structure2)).toBe(false); + + // Attempt batch transcryption (this should throw an error because structures don't match) + const transcryptionInfo = new TranscryptionInfo( + domainA, + domainB, + session, + session, + pseudoSecret, + encSecret + ); + + // Verify we get an error about structure mismatch + expect(() => { + transcryptJsonBatch([encrypted1, encrypted2], transcryptionInfo); + }).toThrow(/Inconsistent structure in batch/); + + // We can encrypt them in a batch which automatically adds padding to make structures consistent + const encryptedBatch = encryptJsonBatch([record1, record2], sessionKeys); + const transcryptedBatch = transcryptJsonBatch(encryptedBatch, transcryptionInfo); + + // Verify we got 2 records back + expect(transcryptedBatch.length).toBe(2); +}); From 16d38baf45abb985219bb0c190fa3f453da418a7 Mon Sep 17 00:00:00 2001 From: Julian van der Horst Date: Tue, 17 Feb 2026 16:17:16 +0100 Subject: [PATCH 11/11] Done --- src/lib/client/py/batch.rs | 110 +++++++++++++++++-------------- src/lib/client/py/distributed.rs | 6 +- src/lib/client/py/functions.rs | 11 +++- src/lib/data/wasm/json.rs | 11 ++-- tests/python/test_json.py | 14 ++-- 5 files changed, 83 insertions(+), 69 deletions(-) diff --git a/src/lib/client/py/batch.rs b/src/lib/client/py/batch.rs index 5ab7231..ea70f2d 100644 --- a/src/lib/client/py/batch.rs +++ b/src/lib/client/py/batch.rs @@ -14,8 +14,8 @@ use crate::factors::py::contexts::{ use crate::factors::TranscryptionInfo; use crate::factors::{AttributeRekeyInfo, PseudonymizationInfo}; use crate::keys::py::types::{ - PyAttributeSessionPublicKey, PyAttributeSessionSecretKey, PyPseudonymSessionPublicKey, - PyPseudonymSessionSecretKey, PyGlobalPublicKeys, AttributeSessionKeys, PseudonymSessionKeys, + PyAttributeSessionPublicKey, PyAttributeSessionSecretKey, PyGlobalPublicKeys, + PyPseudonymSessionPublicKey, PyPseudonymSessionSecretKey, }; use crate::keys::types::{ AttributeSessionPublicKey, AttributeSessionSecretKey, PseudonymSessionPublicKey, @@ -49,8 +49,8 @@ use crate::data::py::long::{ use crate::data::records::LongEncryptedRecord; #[cfg(feature = "json")] -use crate::data::py::json::{PyPEPJSONValue, PyEncryptedPEPJSONValue}; -use crate::keys::{GlobalPublicKeys, PseudonymSessionKeys, SessionKeys}; +use crate::data::py::json::{PyEncryptedPEPJSONValue, PyPEPJSONValue}; +use crate::keys::{GlobalPublicKeys, SessionKeys}; /// Polymorphic batch pseudonymization of a list of encrypted pseudonyms. /// Works with both EncryptedPseudonym and LongEncryptedPseudonym. @@ -296,7 +296,8 @@ pub fn py_encrypt_batch(messages: &Bound, public_key: &Bound) -> P let result = encrypt_batch(&rust_msgs, &key, &mut rng) .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; - let py_result: Vec = result.into_iter().map(PyEncryptedPseudonym).collect(); + let py_result: Vec = + result.into_iter().map(PyEncryptedPseudonym).collect(); return py_result.into_py_any(py); } } @@ -310,7 +311,8 @@ pub fn py_encrypt_batch(messages: &Bound, public_key: &Bound) -> P let result = encrypt_batch(&rust_msgs, &key, &mut rng) .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; - let py_result: Vec = result.into_iter().map(PyEncryptedAttribute).collect(); + let py_result: Vec = + result.into_iter().map(PyEncryptedAttribute).collect(); return py_result.into_py_any(py); } } @@ -325,7 +327,8 @@ pub fn py_encrypt_batch(messages: &Bound, public_key: &Bound) -> P let result = encrypt_batch(&rust_msgs, &key, &mut rng) .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; - let py_result: Vec = result.into_iter().map(PyLongEncryptedPseudonym).collect(); + let py_result: Vec = + result.into_iter().map(PyLongEncryptedPseudonym).collect(); return py_result.into_py_any(py); } } @@ -340,7 +343,8 @@ pub fn py_encrypt_batch(messages: &Bound, public_key: &Bound) -> P let result = encrypt_batch(&rust_msgs, &key, &mut rng) .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; - let py_result: Vec = result.into_iter().map(PyLongEncryptedAttribute).collect(); + let py_result: Vec = + result.into_iter().map(PyLongEncryptedAttribute).collect(); return py_result.into_py_any(py); } } @@ -348,24 +352,16 @@ pub fn py_encrypt_batch(messages: &Bound, public_key: &Bound) -> P // Try Vec with PySessionKeys #[cfg(feature = "json")] if let Ok(jsons) = messages.extract::>() { - if let Ok(pk) = public_key.extract::() { - let keys = SessionKeys { - pseudonym: PseudonymSessionKeys { - public: PseudonymGlobalPublicKey(*pk.pseudonym.0), - secret: None, - }, - attribute: AttributeSessionKeys { - public: AttributeGlobalPublicKey(*pk.attribute.0), - secret: None, - }, - }; + if let Ok(session_keys) = public_key.extract::() { + let keys = SessionKeys::from(session_keys); let rust_msgs: Vec<_> = jsons.into_iter().map(|j| j.0).collect(); // True Batch: Calculates unified padding for JSON structures let result = encrypt_batch(&rust_msgs, &keys, &mut rng) .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; - let py_result: Vec = result.into_iter().map(PyEncryptedPEPJSONValue).collect(); + let py_result: Vec = + result.into_iter().map(PyEncryptedPEPJSONValue).collect(); return py_result.into_py_any(py); } } @@ -392,8 +388,9 @@ pub fn py_decrypt_batch( let rust_encs: Vec<_> = eps.into_iter().map(|e| e.0).collect(); // True Batch Decryption - let result = decrypt_batch(&rust_encs, &key) - .map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)))?; + let result = decrypt_batch(&rust_encs, &key).map_err(|e| { + pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)) + })?; let py_result: Vec = result.into_iter().map(PyPseudonym).collect(); return py_result.into_py_any(py); @@ -406,8 +403,9 @@ pub fn py_decrypt_batch( let key = AttributeSessionSecretKey::from(sk.0 .0); let rust_encs: Vec<_> = eas.into_iter().map(|e| e.0).collect(); - let result = decrypt_batch(&rust_encs, &key) - .map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)))?; + let result = decrypt_batch(&rust_encs, &key).map_err(|e| { + pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)) + })?; let py_result: Vec = result.into_iter().map(PyAttribute).collect(); return py_result.into_py_any(py); @@ -421,8 +419,9 @@ pub fn py_decrypt_batch( let key = PseudonymSessionSecretKey::from(sk.0 .0); let rust_encs: Vec<_> = leps.into_iter().map(|e| e.0).collect(); - let result = decrypt_batch(&rust_encs, &key) - .map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)))?; + let result = decrypt_batch(&rust_encs, &key).map_err(|e| { + pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)) + })?; let py_result: Vec = result.into_iter().map(PyLongPseudonym).collect(); return py_result.into_py_any(py); @@ -436,8 +435,9 @@ pub fn py_decrypt_batch( let key = AttributeSessionSecretKey::from(sk.0 .0); let rust_encs: Vec<_> = leas.into_iter().map(|e| e.0).collect(); - let result = decrypt_batch(&rust_encs, &key) - .map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)))?; + let result = decrypt_batch(&rust_encs, &key).map_err(|e| { + pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)) + })?; let py_result: Vec = result.into_iter().map(PyLongAttribute).collect(); return py_result.into_py_any(py); @@ -452,8 +452,9 @@ pub fn py_decrypt_batch( let keys = SessionKeys::from(k); // True Batch Decryption: handles unpadding of JSON structures - let result = decrypt_batch(&rust_encs, &keys) - .map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)))?; + let result = decrypt_batch(&rust_encs, &keys).map_err(|e| { + pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)) + })?; let py_result: Vec = result.into_iter().map(PyPEPJSONValue).collect(); return py_result.into_py_any(py); @@ -467,6 +468,9 @@ pub fn py_decrypt_batch( /// Polymorphic batch decryption. /// Decrypts a list of encrypted messages with a session secret key. +#[cfg(not(feature = "elgamal3"))] +#[pyfunction] +#[pyo3(name = "decrypt_batch")] pub fn py_decrypt_batch( encrypted: &Bound, secret_key: &Bound, @@ -479,9 +483,9 @@ pub fn py_decrypt_batch( let key = PseudonymSessionSecretKey::from(sk.0 .0); let rust_encs: Vec<_> = eps.into_iter().map(|e| e.0).collect(); - // True Batch Decryption: handles potential reordering/shuffling - let result = decrypt_batch(&rust_encs, &key) - .map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)))?; + let result = decrypt_batch(&rust_encs, &key).map_err(|e| { + pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)) + })?; let py_result: Vec = result.into_iter().map(PyPseudonym).collect(); return py_result.into_py_any(py); @@ -494,8 +498,9 @@ pub fn py_decrypt_batch( let key = AttributeSessionSecretKey::from(sk.0 .0); let rust_encs: Vec<_> = eas.into_iter().map(|e| e.0).collect(); - let result = decrypt_batch(&rust_encs, &key) - .map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)))?; + let result = decrypt_batch(&rust_encs, &key).map_err(|e| { + pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)) + })?; let py_result: Vec = result.into_iter().map(PyAttribute).collect(); return py_result.into_py_any(py); @@ -509,8 +514,9 @@ pub fn py_decrypt_batch( let key = PseudonymSessionSecretKey::from(sk.0 .0); let rust_encs: Vec<_> = leps.into_iter().map(|e| e.0).collect(); - let result = decrypt_batch(&rust_encs, &key) - .map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)))?; + let result = decrypt_batch(&rust_encs, &key).map_err(|e| { + pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)) + })?; let py_result: Vec = result.into_iter().map(PyLongPseudonym).collect(); return py_result.into_py_any(py); @@ -524,8 +530,9 @@ pub fn py_decrypt_batch( let key = AttributeSessionSecretKey::from(sk.0 .0); let rust_encs: Vec<_> = leas.into_iter().map(|e| e.0).collect(); - let result = decrypt_batch(&rust_encs, &key) - .map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)))?; + let result = decrypt_batch(&rust_encs, &key).map_err(|e| { + pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)) + })?; let py_result: Vec = result.into_iter().map(PyLongAttribute).collect(); return py_result.into_py_any(py); @@ -535,16 +542,13 @@ pub fn py_decrypt_batch( // Try Vec with PySessionKeys #[cfg(feature = "json")] if let Ok(jsons) = encrypted.extract::>() { - if let Ok(pk) = secret_key.extract::() { - let keys = GlobalPublicKeys { - pseudonym: PseudonymGlobalPublicKey(*pk.pseudonym.0), - attribute: AttributeGlobalPublicKey(*pk.attribute.0) - }; + if let Ok(k) = secret_key.extract::() { let rust_encs: Vec<_> = jsons.into_iter().map(|j| j.0).collect(); + let keys = SessionKeys::from(k); - // True Batch Decryption: handles automatic unpadding of JSON data - let result = decrypt_batch(&rust_encs, &keys) - .map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)))?; + let result = decrypt_batch(&rust_encs, &keys).map_err(|e| { + pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)) + })?; let py_result: Vec = result.into_iter().map(PyPEPJSONValue).collect(); return py_result.into_py_any(py); @@ -618,14 +622,18 @@ pub fn py_encrypt_global_batch( } } - // Try Vec with GlobalKeys + // Try Vec with GlobalPublicKeys #[cfg(feature = "json")] if let Ok(jsons) = messages.extract::>() { - if let Ok(keys) = public_key.extract::() { - let info = TranscryptionInfo::from(keys); + if let Ok(pk) = public_key.extract::() { + use crate::data::traits::Encryptable; + let keys = GlobalPublicKeys { + pseudonym: PseudonymGlobalPublicKey(pk.pseudonym.0 .0), + attribute: AttributeGlobalPublicKey(pk.attribute.0 .0), + }; let result: Vec = jsons .into_iter() - .map(|j| PyEncryptedPEPJSONValue(j.0.encrypt_global(&info, &mut rng))) + .map(|j| PyEncryptedPEPJSONValue(j.0.encrypt_global(&keys, &mut rng))) .collect(); return result.into_py_any(py); } diff --git a/src/lib/client/py/distributed.rs b/src/lib/client/py/distributed.rs index 527d32f..c45ac3d 100644 --- a/src/lib/client/py/distributed.rs +++ b/src/lib/client/py/distributed.rs @@ -444,7 +444,8 @@ impl PyClient { #[cfg(feature = "json")] if let Ok(leas) = encrypted.extract::>() { let enc: Vec<_> = leas.into_iter().map(|e| e.0).collect(); - let result = self .0 + let result = self + .0 .decrypt_batch(&enc) .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; let py_result: Vec = result.into_iter().map(PyPEPJSONValue).collect(); @@ -512,7 +513,8 @@ impl PyClient { #[cfg(feature = "json")] if let Ok(leas) = encrypted.extract::>() { let enc: Vec<_> = leas.into_iter().map(|e| e.0).collect(); - let result = self .0 + let result = self + .0 .decrypt_batch(&enc) .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; let py_result: Vec = result.into_iter().map(PyPEPJSONValue).collect(); diff --git a/src/lib/client/py/functions.rs b/src/lib/client/py/functions.rs index 8b12c19..cf904cd 100644 --- a/src/lib/client/py/functions.rs +++ b/src/lib/client/py/functions.rs @@ -18,7 +18,9 @@ use crate::data::py::simple::{ PyAttribute, PyEncryptedAttribute, PyEncryptedPseudonym, PyPseudonym, }; #[cfg(feature = "offline")] -use crate::keys::py::types::{PyAttributeGlobalPublicKey, PyPseudonymGlobalPublicKey, PyGlobalPublicKeys}; +use crate::keys::py::types::{ + PyAttributeGlobalPublicKey, PyGlobalPublicKeys, PyPseudonymGlobalPublicKey, +}; #[cfg(all(feature = "offline", feature = "insecure"))] use crate::keys::py::types::{PyAttributeGlobalSecretKey, PyPseudonymGlobalSecretKey}; use crate::keys::py::PySessionKeys; @@ -30,7 +32,10 @@ use crate::keys::py::{ use crate::keys::{AttributeGlobalPublicKey, PseudonymGlobalPublicKey}; #[cfg(all(feature = "offline", feature = "insecure"))] use crate::keys::{AttributeGlobalSecretKey, PseudonymGlobalSecretKey}; -use crate::keys::{AttributeSessionPublicKey, AttributeSessionSecretKey, GlobalPublicKeys, PseudonymSessionPublicKey, PseudonymSessionSecretKey, SessionKeys}; +use crate::keys::{ + AttributeSessionPublicKey, AttributeSessionSecretKey, GlobalPublicKeys, + PseudonymSessionPublicKey, PseudonymSessionSecretKey, SessionKeys, +}; use pyo3::exceptions::PyTypeError; use pyo3::prelude::*; use pyo3::types::PyAny; @@ -370,7 +375,7 @@ pub fn py_encrypt_global(message: &Bound, public_key: &Bound) -> P if let Ok(pk) = public_key.extract::() { let keys = GlobalPublicKeys { pseudonym: PseudonymGlobalPublicKey(*pk.pseudonym.0), - attribute: AttributeGlobalPublicKey(*pk.attribute.0) + attribute: AttributeGlobalPublicKey(*pk.attribute.0), }; let result = encrypt_global(&json.0, &keys, &mut rng); return Ok(Py::new(py, PyEncryptedPEPJSONValue(result))?.into_any()); diff --git a/src/lib/data/wasm/json.rs b/src/lib/data/wasm/json.rs index 4cde9e5..3007be6 100644 --- a/src/lib/data/wasm/json.rs +++ b/src/lib/data/wasm/json.rs @@ -1,6 +1,6 @@ //! WASM bindings for PEP JSON encryption. -use crate::client::{decrypt, encrypt, encrypt_batch, decrypt_batch}; +use crate::client::{decrypt, decrypt_batch, encrypt, encrypt_batch}; #[cfg(all(feature = "offline", feature = "insecure"))] use crate::client::{decrypt_global, encrypt_global}; use crate::data::json::builder::PEPJSONBuilder; @@ -395,13 +395,10 @@ pub fn wasm_decrypt_json_batch( ) -> Result, JsValue> { let keys: SessionKeys = (*session_keys).into(); let rust_encrypted: Vec = encrypted.into_iter().map(|v| v.0).collect(); - let decrypted = decrypt_batch(&rust_encrypted, &keys) - .map_err(|e| JsValue::from_str(&format!("{}", e)))?; + let decrypted = + decrypt_batch(&rust_encrypted, &keys).map_err(|e| JsValue::from_str(&format!("{}", e)))?; - Ok(decrypted - .into_iter() - .map(WASMPEPJSONValue) - .collect()) + Ok(decrypted.into_iter().map(WASMPEPJSONValue).collect()) } /// Transcrypt a batch of EncryptedPEPJSONValues using a TranscryptionInfo object. diff --git a/tests/python/test_json.py b/tests/python/test_json.py index 5797b35..77cdb9e 100644 --- a/tests/python/test_json.py +++ b/tests/python/test_json.py @@ -294,12 +294,14 @@ def test_json_batch_transcryption_same_structure_different_lengths(self): # Verify output self.assertEqual(len(transcrypted_batch), 2) - # Decrypt and check data integrity - dec_json1 = decrypt(transcrypted_batch[0], session_keys).to_json() - dec_json2 = decrypt(transcrypted_batch[1], session_keys).to_json() - - self.assertEqual(dec_json1["diagnosis"], "Flu") - self.assertEqual(dec_json2["diagnosis"], "Flu with a very long description to ensure structure length differs") + # Decrypt and check data integrity (order may be shuffled by batch transcryption) + decrypted_jsons = [ + decrypt(v, session_keys).to_json() for v in transcrypted_batch + ] + diagnoses = {d["diagnosis"] for d in decrypted_jsons} + + self.assertIn("Flu", diagnoses) + self.assertIn("Flu with a very long description to ensure structure length differs", diagnoses) if __name__ == "__main__":