diff --git a/Cargo.toml b/Cargo.toml index 5cb4afa..be2aad6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "libpep" edition = "2021" -version = "0.11.1" +version = "0.12.0" authors = ["Bernard van Gastel ", "Job Doesburg "] homepage = "https://github.com/NOLAI/libpep" repository = "https://github.com/NOLAI/libpep" diff --git a/package.json b/package.json index 239109f..278e549 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@nolai/libpep-wasm", - "version": "0.11.1", + "version": "0.12.0", "description": "Library for polymorphic encryption and pseudonymization (in WASM)", "repository": { "type": "git", diff --git a/src/lib/client/batch.rs b/src/lib/client/batch.rs index c523cd2..478a244 100644 --- a/src/lib/client/batch.rs +++ b/src/lib/client/batch.rs @@ -1,6 +1,6 @@ //! Batch operations for encryption and decryption. -use crate::data::traits::{Encryptable, Encrypted}; +use crate::data::traits::{BatchEncryptable, Encryptable, Encrypted}; use crate::transcryptor::batch::BatchError; use rand_core::{CryptoRng, Rng}; @@ -17,6 +17,23 @@ pub fn encrypt_batch( public_key: &M::PublicKeyType, rng: &mut R, ) -> Result, BatchError> +where + M: BatchEncryptable, + R: Rng + CryptoRng, +{ + let preprocessed = M::preprocess_batch(messages)?; + Ok(preprocessed + .iter() + .map(|x| x.encrypt(public_key, rng)) + .collect()) +} + +#[cfg(feature = "insecure")] +pub fn encrypt_batch_raw( + messages: &[M], + public_key: &M::PublicKeyType, + rng: &mut R, +) -> Result, BatchError> where M: Encryptable, R: Rng + CryptoRng, diff --git a/src/lib/client/functions.rs b/src/lib/client/functions.rs index 1dde3bb..e100811 100644 --- a/src/lib/client/functions.rs +++ b/src/lib/client/functions.rs @@ -40,7 +40,8 @@ where /// # Examples /// ```rust,ignore /// let pseudonym = decrypt(&encrypted_pseudonym, &pseudonym_key); -/// let attribute = decrypt(&encrypted_attribute, &attribute_key); +/// let attribute = decrypt(&encrypted_attribute, +/// &attribute_key); /// ``` #[cfg(not(feature = "elgamal3"))] pub fn decrypt(encrypted: &E, secret_key: &E::SecretKeyType) -> E::UnencryptedType diff --git a/src/lib/client/py/batch.rs b/src/lib/client/py/batch.rs index 025037f..ea70f2d 100644 --- a/src/lib/client/py/batch.rs +++ b/src/lib/client/py/batch.rs @@ -1,6 +1,6 @@ //! Python bindings for batch transcryption operations. -use crate::client::{decrypt, encrypt}; +use crate::client::{decrypt_batch, encrypt_batch}; use crate::data::py::records::PyEncryptedRecord; #[cfg(feature = "long")] use crate::data::py::records::PyLongEncryptedRecord; @@ -14,8 +14,8 @@ use crate::factors::py::contexts::{ use crate::factors::TranscryptionInfo; use crate::factors::{AttributeRekeyInfo, PseudonymizationInfo}; use crate::keys::py::types::{ - PyAttributeSessionPublicKey, PyAttributeSessionSecretKey, PyPseudonymSessionPublicKey, - PyPseudonymSessionSecretKey, + PyAttributeSessionPublicKey, PyAttributeSessionSecretKey, PyGlobalPublicKeys, + PyPseudonymSessionPublicKey, PyPseudonymSessionSecretKey, }; use crate::keys::types::{ AttributeSessionPublicKey, AttributeSessionSecretKey, PseudonymSessionPublicKey, @@ -49,7 +49,8 @@ use crate::data::py::long::{ use crate::data::records::LongEncryptedRecord; #[cfg(feature = "json")] -use crate::data::py::json::PyEncryptedPEPJSONValue; +use crate::data::py::json::{PyEncryptedPEPJSONValue, PyPEPJSONValue}; +use crate::keys::{GlobalPublicKeys, SessionKeys}; /// Polymorphic batch pseudonymization of a list of encrypted pseudonyms. /// Works with both EncryptedPseudonym and LongEncryptedPseudonym. @@ -285,53 +286,83 @@ pub fn py_encrypt_batch(messages: &Bound, public_key: &Bound) -> P let py = messages.py(); let mut rng = rand::rng(); - // Try Vec with PseudonymSessionPublicKey + // Try Vec with PyPseudonymSessionPublicKey if let Ok(ps) = messages.extract::>() { if let Ok(pk) = public_key.extract::() { let key = PseudonymSessionPublicKey::from(pk.0 .0); - let result: Vec = ps - .into_iter() - .map(|p| PyEncryptedPseudonym(encrypt(&p.0, &key, &mut rng))) - .collect(); - return result.into_py_any(py); + let rust_msgs: Vec<_> = ps.into_iter().map(|p| p.0).collect(); + + // True Batch: Shuffles and encrypts + let result = encrypt_batch(&rust_msgs, &key, &mut rng) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; + + let py_result: Vec = + result.into_iter().map(PyEncryptedPseudonym).collect(); + return py_result.into_py_any(py); } } - // Try Vec with AttributeSessionPublicKey + // Try Vec with PyAttributeSessionPublicKey if let Ok(attrs) = messages.extract::>() { if let Ok(pk) = public_key.extract::() { let key = AttributeSessionPublicKey::from(pk.0 .0); - let result: Vec = attrs - .into_iter() - .map(|a| PyEncryptedAttribute(encrypt(&a.0, &key, &mut rng))) - .collect(); - return result.into_py_any(py); + let rust_msgs: Vec<_> = attrs.into_iter().map(|a| a.0).collect(); + + let result = encrypt_batch(&rust_msgs, &key, &mut rng) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; + + let py_result: Vec = + result.into_iter().map(PyEncryptedAttribute).collect(); + return py_result.into_py_any(py); } } - // Try Vec with PseudonymSessionPublicKey + // Try Vec #[cfg(feature = "long")] if let Ok(lps) = messages.extract::>() { if let Ok(pk) = public_key.extract::() { let key = PseudonymSessionPublicKey::from(pk.0 .0); - let result: Vec = lps - .into_iter() - .map(|p| PyLongEncryptedPseudonym(encrypt(&p.0, &key, &mut rng))) - .collect(); - return result.into_py_any(py); + let rust_msgs: Vec<_> = lps.into_iter().map(|p| p.0).collect(); + + let result = encrypt_batch(&rust_msgs, &key, &mut rng) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; + + let py_result: Vec = + result.into_iter().map(PyLongEncryptedPseudonym).collect(); + return py_result.into_py_any(py); } } - // Try Vec with AttributeSessionPublicKey + // Try Vec #[cfg(feature = "long")] if let Ok(las) = messages.extract::>() { if let Ok(pk) = public_key.extract::() { let key = AttributeSessionPublicKey::from(pk.0 .0); - let result: Vec = las - .into_iter() - .map(|a| PyLongEncryptedAttribute(encrypt(&a.0, &key, &mut rng))) - .collect(); - return result.into_py_any(py); + let rust_msgs: Vec<_> = las.into_iter().map(|a| a.0).collect(); + + let result = encrypt_batch(&rust_msgs, &key, &mut rng) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; + + let py_result: Vec = + result.into_iter().map(PyLongEncryptedAttribute).collect(); + return py_result.into_py_any(py); + } + } + + // Try Vec with PySessionKeys + #[cfg(feature = "json")] + if let Ok(jsons) = messages.extract::>() { + if let Ok(session_keys) = public_key.extract::() { + let keys = SessionKeys::from(session_keys); + let rust_msgs: Vec<_> = jsons.into_iter().map(|j| j.0).collect(); + + // True Batch: Calculates unified padding for JSON structures + let result = encrypt_batch(&rust_msgs, &keys, &mut rng) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; + + let py_result: Vec = + result.into_iter().map(PyEncryptedPEPJSONValue).collect(); + return py_result.into_py_any(py); } } @@ -339,7 +370,6 @@ pub fn py_encrypt_batch(messages: &Bound, public_key: &Bound) -> P "encrypt_batch() requires (Vec[unencrypted_type], matching_public_key)", )) } - /// Polymorphic batch decryption. /// Decrypts a list of encrypted messages with a session secret key. #[cfg(feature = "elgamal3")] @@ -351,69 +381,83 @@ pub fn py_decrypt_batch( ) -> PyResult> { let py = encrypted.py(); - // Try Vec with PseudonymSessionSecretKey + // Try Vec with PyPseudonymSessionSecretKey if let Ok(eps) = encrypted.extract::>() { if let Ok(sk) = secret_key.extract::() { let key = PseudonymSessionSecretKey::from(sk.0 .0); - let result: Vec<_> = eps - .into_iter() - .map(|ep| { - decrypt(&ep.0, &key) - .map(PyPseudonym) - .ok_or_else(|| pyo3::exceptions::PyValueError::new_err("Decryption failed")) - }) - .collect::, _>>()?; - return result.into_py_any(py); + let rust_encs: Vec<_> = eps.into_iter().map(|e| e.0).collect(); + + // True Batch Decryption + let result = decrypt_batch(&rust_encs, &key).map_err(|e| { + pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)) + })?; + + let py_result: Vec = result.into_iter().map(PyPseudonym).collect(); + return py_result.into_py_any(py); } } - // Try Vec with AttributeSessionSecretKey + // Try Vec with PyAttributeSessionSecretKey if let Ok(eas) = encrypted.extract::>() { if let Ok(sk) = secret_key.extract::() { let key = AttributeSessionSecretKey::from(sk.0 .0); - let result: Vec<_> = eas - .into_iter() - .map(|ea| { - decrypt(&ea.0, &key) - .map(PyAttribute) - .ok_or_else(|| pyo3::exceptions::PyValueError::new_err("Decryption failed")) - }) - .collect::, _>>()?; - return result.into_py_any(py); + let rust_encs: Vec<_> = eas.into_iter().map(|e| e.0).collect(); + + let result = decrypt_batch(&rust_encs, &key).map_err(|e| { + pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)) + })?; + + let py_result: Vec = result.into_iter().map(PyAttribute).collect(); + return py_result.into_py_any(py); } } - // Try Vec with PseudonymSessionSecretKey + // Try Vec #[cfg(feature = "long")] if let Ok(leps) = encrypted.extract::>() { if let Ok(sk) = secret_key.extract::() { let key = PseudonymSessionSecretKey::from(sk.0 .0); - let result: Vec<_> = leps - .into_iter() - .map(|lep| { - decrypt(&lep.0, &key) - .map(PyLongPseudonym) - .ok_or_else(|| pyo3::exceptions::PyValueError::new_err("Decryption failed")) - }) - .collect::, _>>()?; - return result.into_py_any(py); + let rust_encs: Vec<_> = leps.into_iter().map(|e| e.0).collect(); + + let result = decrypt_batch(&rust_encs, &key).map_err(|e| { + pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)) + })?; + + let py_result: Vec = result.into_iter().map(PyLongPseudonym).collect(); + return py_result.into_py_any(py); } } - // Try Vec with AttributeSessionSecretKey + // Try Vec #[cfg(feature = "long")] if let Ok(leas) = encrypted.extract::>() { if let Ok(sk) = secret_key.extract::() { let key = AttributeSessionSecretKey::from(sk.0 .0); - let result: Vec<_> = leas - .into_iter() - .map(|lea| { - decrypt(&lea.0, &key) - .map(PyLongAttribute) - .ok_or_else(|| pyo3::exceptions::PyValueError::new_err("Decryption failed")) - }) - .collect::, _>>()?; - return result.into_py_any(py); + let rust_encs: Vec<_> = leas.into_iter().map(|e| e.0).collect(); + + let result = decrypt_batch(&rust_encs, &key).map_err(|e| { + pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)) + })?; + + let py_result: Vec = result.into_iter().map(PyLongAttribute).collect(); + return py_result.into_py_any(py); + } + } + + // Try Vec with PySessionKeys + #[cfg(feature = "json")] + if let Ok(jsons) = encrypted.extract::>() { + if let Ok(k) = secret_key.extract::() { + let rust_encs: Vec<_> = jsons.into_iter().map(|j| j.0).collect(); + let keys = SessionKeys::from(k); + + // True Batch Decryption: handles unpadding of JSON structures + let result = decrypt_batch(&rust_encs, &keys).map_err(|e| { + pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)) + })?; + + let py_result: Vec = result.into_iter().map(PyPEPJSONValue).collect(); + return py_result.into_py_any(py); } } @@ -433,53 +477,81 @@ pub fn py_decrypt_batch( ) -> PyResult> { let py = encrypted.py(); - // Try Vec with PseudonymSessionSecretKey + // Try Vec with PyPseudonymSessionSecretKey if let Ok(eps) = encrypted.extract::>() { if let Ok(sk) = secret_key.extract::() { let key = PseudonymSessionSecretKey::from(sk.0 .0); - let result: Vec = eps - .into_iter() - .map(|ep| PyPseudonym(decrypt(&ep.0, &key))) - .collect(); - return result.into_py_any(py); + let rust_encs: Vec<_> = eps.into_iter().map(|e| e.0).collect(); + + let result = decrypt_batch(&rust_encs, &key).map_err(|e| { + pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)) + })?; + + let py_result: Vec = result.into_iter().map(PyPseudonym).collect(); + return py_result.into_py_any(py); } } - // Try Vec with AttributeSessionSecretKey + // Try Vec with PyAttributeSessionSecretKey if let Ok(eas) = encrypted.extract::>() { if let Ok(sk) = secret_key.extract::() { let key = AttributeSessionSecretKey::from(sk.0 .0); - let result: Vec = eas - .into_iter() - .map(|ea| PyAttribute(decrypt(&ea.0, &key))) - .collect(); - return result.into_py_any(py); + let rust_encs: Vec<_> = eas.into_iter().map(|e| e.0).collect(); + + let result = decrypt_batch(&rust_encs, &key).map_err(|e| { + pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)) + })?; + + let py_result: Vec = result.into_iter().map(PyAttribute).collect(); + return py_result.into_py_any(py); } } - // Try Vec with PseudonymSessionSecretKey + // Try Vec #[cfg(feature = "long")] if let Ok(leps) = encrypted.extract::>() { if let Ok(sk) = secret_key.extract::() { let key = PseudonymSessionSecretKey::from(sk.0 .0); - let result: Vec = leps - .into_iter() - .map(|lep| PyLongPseudonym(decrypt(&lep.0, &key))) - .collect(); - return result.into_py_any(py); + let rust_encs: Vec<_> = leps.into_iter().map(|e| e.0).collect(); + + let result = decrypt_batch(&rust_encs, &key).map_err(|e| { + pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)) + })?; + + let py_result: Vec = result.into_iter().map(PyLongPseudonym).collect(); + return py_result.into_py_any(py); } } - // Try Vec with AttributeSessionSecretKey + // Try Vec #[cfg(feature = "long")] if let Ok(leas) = encrypted.extract::>() { if let Ok(sk) = secret_key.extract::() { let key = AttributeSessionSecretKey::from(sk.0 .0); - let result: Vec = leas - .into_iter() - .map(|lea| PyLongAttribute(decrypt(&lea.0, &key))) - .collect(); - return result.into_py_any(py); + let rust_encs: Vec<_> = leas.into_iter().map(|e| e.0).collect(); + + let result = decrypt_batch(&rust_encs, &key).map_err(|e| { + pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)) + })?; + + let py_result: Vec = result.into_iter().map(PyLongAttribute).collect(); + return py_result.into_py_any(py); + } + } + + // Try Vec with PySessionKeys + #[cfg(feature = "json")] + if let Ok(jsons) = encrypted.extract::>() { + if let Ok(k) = secret_key.extract::() { + let rust_encs: Vec<_> = jsons.into_iter().map(|j| j.0).collect(); + let keys = SessionKeys::from(k); + + let result = decrypt_batch(&rust_encs, &keys).map_err(|e| { + pyo3::exceptions::PyValueError::new_err(format!("Decryption failed: {}", e)) + })?; + + let py_result: Vec = result.into_iter().map(PyPEPJSONValue).collect(); + return py_result.into_py_any(py); } } @@ -550,6 +622,23 @@ pub fn py_encrypt_global_batch( } } + // Try Vec with GlobalPublicKeys + #[cfg(feature = "json")] + if let Ok(jsons) = messages.extract::>() { + if let Ok(pk) = public_key.extract::() { + use crate::data::traits::Encryptable; + let keys = GlobalPublicKeys { + pseudonym: PseudonymGlobalPublicKey(pk.pseudonym.0 .0), + attribute: AttributeGlobalPublicKey(pk.attribute.0 .0), + }; + let result: Vec = jsons + .into_iter() + .map(|j| PyEncryptedPEPJSONValue(j.0.encrypt_global(&keys, &mut rng))) + .collect(); + return result.into_py_any(py); + } + } + Err(PyTypeError::new_err( "encrypt_global_batch() requires (Vec[unencrypted_type], matching_global_public_key)", )) @@ -632,6 +721,23 @@ pub fn py_decrypt_global_batch( } } + // Try Vec with GlobalKeys + #[cfg(feature = "json")] + if let Ok(jsons) = encrypted.extract::>() { + if let Ok(keys) = secret_key.extract::() { + let info = TranscryptionInfo::from(keys); + let result: Vec<_> = jsons + .into_iter() + .map(|j| { + j.0.decrypt_global(&info) + .map(PyPEPJSONValue) + .ok_or_else(|| pyo3::exceptions::PyValueError::new_err("Decryption failed")) + }) + .collect::, _>>()?; + return result.into_py_any(py); + } + } + Err(PyTypeError::new_err( "decrypt_global_batch() requires (Vec[encrypted_type], matching_global_secret_key)", )) @@ -698,6 +804,19 @@ pub fn py_decrypt_global_batch( } } + // Try Vec with GlobalKeys + #[cfg(feature = "json")] + if let Ok(jsons) = encrypted.extract::>() { + if let Ok(keys) = secret_key.extract::() { + let info = TranscryptionInfo::from(keys); + let result: Vec = jsons + .into_iter() + .map(|j| PyPEPJSONValue(j.0.decrypt_global(&info))) + .collect(); + return Ok(result.into_py_any(py)?); + } + } + Err(PyTypeError::new_err( "decrypt_global_batch() requires (Vec[encrypted_type], matching_global_secret_key)", )) diff --git a/src/lib/client/py/distributed.rs b/src/lib/client/py/distributed.rs index a5ec9ba..c45ac3d 100644 --- a/src/lib/client/py/distributed.rs +++ b/src/lib/client/py/distributed.rs @@ -370,8 +370,21 @@ impl PyClient { return py_result.into_py_any(py); } + // Try Vec - uses SessionKeys directly + #[cfg(feature = "json")] + if let Ok(las) = messages.extract::>() { + let msgs: Vec<_> = las.into_iter().map(|a| a.0).collect(); + let result = self + .0 + .encrypt_batch(&msgs, &mut rng) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; + let py_result: Vec = + result.into_iter().map(PyEncryptedPEPJSONValue).collect(); + return py_result.into_py_any(py); + } + Err(PyTypeError::new_err( - "encrypt_batch() requires Vec[Pseudonym], Vec[Attribute], Vec[LongPseudonym], or Vec[LongAttribute]", + "encrypt_batch() requires Vec[Pseudonym], Vec[Attribute], Vec[LongPseudonym], or Vec[LongAttribute], or Vec[PEPJSONValue]", )) } @@ -427,8 +440,20 @@ impl PyClient { return py_result.into_py_any(py); } + // Try Vec - uses SessionKeys directly + #[cfg(feature = "json")] + if let Ok(leas) = encrypted.extract::>() { + let enc: Vec<_> = leas.into_iter().map(|e| e.0).collect(); + let result = self + .0 + .decrypt_batch(&enc) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; + let py_result: Vec = result.into_iter().map(PyPEPJSONValue).collect(); + return py_result.into_py_any(py); + } + Err(PyTypeError::new_err( - "decrypt_batch() requires Vec[EncryptedPseudonym], Vec[EncryptedAttribute], Vec[LongEncryptedPseudonym], or Vec[LongEncryptedAttribute]", + "decrypt_batch() requires Vec[EncryptedPseudonym], Vec[EncryptedAttribute], Vec[LongEncryptedPseudonym], or Vec[LongEncryptedAttribute], or Vec[EncryptedPEPJSONValue]", )) } @@ -484,8 +509,20 @@ impl PyClient { return py_result.into_py_any(py); } + // Try Vec - uses SessionKeys directly + #[cfg(feature = "json")] + if let Ok(leas) = encrypted.extract::>() { + let enc: Vec<_> = leas.into_iter().map(|e| e.0).collect(); + let result = self + .0 + .decrypt_batch(&enc) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; + let py_result: Vec = result.into_iter().map(PyPEPJSONValue).collect(); + return py_result.into_py_any(py); + } + Err(PyTypeError::new_err( - "decrypt_batch() requires Vec[EncryptedPseudonym], Vec[EncryptedAttribute], Vec[LongEncryptedPseudonym], or Vec[LongEncryptedAttribute]", + "decrypt_batch() requires Vec[EncryptedPseudonym], Vec[EncryptedAttribute], Vec[LongEncryptedPseudonym], or Vec[LongEncryptedAttribute], or Vec[EncryptedPEPJSONValue]", )) } diff --git a/src/lib/client/py/functions.rs b/src/lib/client/py/functions.rs index 696603c..cf904cd 100644 --- a/src/lib/client/py/functions.rs +++ b/src/lib/client/py/functions.rs @@ -18,7 +18,9 @@ use crate::data::py::simple::{ PyAttribute, PyEncryptedAttribute, PyEncryptedPseudonym, PyPseudonym, }; #[cfg(feature = "offline")] -use crate::keys::py::types::{PyAttributeGlobalPublicKey, PyPseudonymGlobalPublicKey}; +use crate::keys::py::types::{ + PyAttributeGlobalPublicKey, PyGlobalPublicKeys, PyPseudonymGlobalPublicKey, +}; #[cfg(all(feature = "offline", feature = "insecure"))] use crate::keys::py::types::{PyAttributeGlobalSecretKey, PyPseudonymGlobalSecretKey}; use crate::keys::py::PySessionKeys; @@ -31,8 +33,8 @@ use crate::keys::{AttributeGlobalPublicKey, PseudonymGlobalPublicKey}; #[cfg(all(feature = "offline", feature = "insecure"))] use crate::keys::{AttributeGlobalSecretKey, PseudonymGlobalSecretKey}; use crate::keys::{ - AttributeSessionPublicKey, AttributeSessionSecretKey, PseudonymSessionPublicKey, - PseudonymSessionSecretKey, SessionKeys, + AttributeSessionPublicKey, AttributeSessionSecretKey, GlobalPublicKeys, + PseudonymSessionPublicKey, PseudonymSessionSecretKey, SessionKeys, }; use pyo3::exceptions::PyTypeError; use pyo3::prelude::*; @@ -367,6 +369,19 @@ pub fn py_encrypt_global(message: &Bound, public_key: &Bound) -> P } } + // Try PEPJSONValue with SessionKeys + #[cfg(feature = "json")] + if let Ok(json) = message.extract::() { + if let Ok(pk) = public_key.extract::() { + let keys = GlobalPublicKeys { + pseudonym: PseudonymGlobalPublicKey(*pk.pseudonym.0), + attribute: AttributeGlobalPublicKey(*pk.attribute.0), + }; + let result = encrypt_global(&json.0, &keys, &mut rng); + return Ok(Py::new(py, PyEncryptedPEPJSONValue(result))?.into_any()); + } + } + Err(PyTypeError::new_err( "encrypt_global() requires (unencrypted_type, matching_global_public_key)", )) @@ -430,6 +445,18 @@ pub fn py_decrypt_global( } } + // Try EncryptedPEPJSONValue with SessionKeys + #[cfg(feature = "json")] + if let Ok(ej) = encrypted.extract::() { + if let Ok(sk) = secret_key.extract::() { + let keys: SessionKeys = sk.clone().into(); + if let Some(result) = decrypt_global(&ej.0, &keys) { + return Ok(Py::new(py, PyPEPJSONValue(result))?.into_any()); + } + return Err(pyo3::exceptions::PyValueError::new_err("Decryption failed")); + } + } + Err(PyTypeError::new_err( "decrypt_global() requires (encrypted_type, matching_global_secret_key)", )) @@ -484,6 +511,16 @@ pub fn py_decrypt_global( } } + // Try EncryptedPEPJSONValue with SessionKeys + #[cfg(feature = "json")] + if let Ok(ej) = encrypted.extract::() { + if let Ok(sk) = secret_key.extract::() { + let keys: SessionKeys = sk.clone().into(); + let result = decrypt_global(&ej.0, &keys); + return Ok(Py::new(py, PyPEPJSONValue(result))?.into_any()); + } + } + Err(PyTypeError::new_err( "decrypt_global() requires (encrypted_type, matching_global_secret_key)", )) @@ -628,6 +665,32 @@ pub fn py_encrypt_batch( } } + // Try PEPJSONValue + SessionKeys + #[cfg(feature = "json")] + if let Ok(sk) = key.extract::() { + if messages[0].extract::().is_ok() { + let rust_msgs: Vec<_> = messages + .iter() + .map(|m| { + m.extract::() + .expect("type already validated") + .0 + }) + .collect(); + let keys: SessionKeys = sk.clone().into(); + let encrypted = encrypt_batch(&rust_msgs, &keys, &mut rng) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("{}", e)))?; + return Ok(encrypted + .into_iter() + .map(|e| { + Py::new(py, PyEncryptedPEPJSONValue(e)) + .expect("PyO3 allocation failed") + .into_any() + }) + .collect()); + } + } + Err(PyTypeError::new_err( "encrypt_batch() requires list of (Pseudonym|Attribute|LongPseudonym|LongAttribute) and matching key", )) @@ -748,6 +811,32 @@ pub fn py_decrypt_batch( } } + // Try EncryptedPEPJSONValue + SessionKeys + #[cfg(feature = "json")] + if let Ok(sk) = key.extract::() { + if encrypted[0].extract::().is_ok() { + let rust_encs: Vec<_> = encrypted + .iter() + .map(|e| { + e.extract::() + .expect("type already validated") + .0 + }) + .collect(); + let keys: SessionKeys = sk.clone().into(); + let decrypted = decrypt_batch(&rust_encs, &keys) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("{}", e)))?; + return Ok(decrypted + .into_iter() + .map(|d| { + Py::new(py, PyPEPJSONValue(d)) + .expect("PyO3 allocation failed") + .into_any() + }) + .collect()); + } + } + Err(PyTypeError::new_err( "decrypt_batch() requires list of encrypted types and matching key", )) @@ -868,6 +957,32 @@ pub fn py_decrypt_batch( } } + // Try EncryptedPEPJSONValue + SessionKeys + #[cfg(feature = "json")] + if let Ok(sk) = key.extract::() { + if encrypted[0].extract::().is_ok() { + let rust_encs: Vec<_> = encrypted + .iter() + .map(|e| { + e.extract::() + .expect("type already validated") + .0 + }) + .collect(); + let keys: SessionKeys = sk.clone().into(); + let decrypted = decrypt_batch(&rust_encs, &keys) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("{}", e)))?; + return Ok(decrypted + .into_iter() + .map(|d| { + Py::new(py, PyPEPJSONValue(d)) + .expect("PyO3 allocation failed") + .into_any() + }) + .collect()); + } + } + Err(PyTypeError::new_err( "decrypt_batch() requires list of encrypted types and matching key", )) diff --git a/src/lib/client/py/types.rs b/src/lib/client/py/types.rs index a4c16d1..42c1275 100644 --- a/src/lib/client/py/types.rs +++ b/src/lib/client/py/types.rs @@ -72,7 +72,7 @@ impl PyOfflineClient { } Err(PyTypeError::new_err( - "encrypt() requires Pseudonym, Attribute, LongPseudonym, or LongAttribute", + "encrypt() requires Pseudonym, Attribute, LongPseudonym, or LongAttribute, or PEPJSONValue", )) } @@ -133,8 +133,21 @@ impl PyOfflineClient { return py_result.into_py_any(py); } + // Try Vec + #[cfg(feature = "json")] + if let Ok(jsons) = messages.extract::>() { + let msgs: Vec<_> = jsons.into_iter().map(|j| j.0).collect(); + let result = self + .0 + .encrypt_batch(&msgs, &mut rng) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; + let py_result: Vec = + result.into_iter().map(PyEncryptedPEPJSONValue).collect(); + return py_result.into_py_any(py); + } + Err(PyTypeError::new_err( - "encrypt_batch() requires Vec[Pseudonym], Vec[Attribute], Vec[LongPseudonym], or Vec[LongAttribute]", + "encrypt_batch() requires Vec[Pseudonym], Vec[Attribute], Vec[LongPseudonym], or Vec[LongAttribute], or Vec[PEPJSONValue]", )) } diff --git a/src/lib/client/types.rs b/src/lib/client/types.rs index 5206968..ca5d42a 100644 --- a/src/lib/client/types.rs +++ b/src/lib/client/types.rs @@ -1,6 +1,6 @@ //! Client type definitions. -use crate::data::traits::{Encryptable, Encrypted}; +use crate::data::traits::{BatchEncryptable, Encryptable, Encrypted}; #[cfg(feature = "offline")] use crate::keys::GlobalPublicKeys; use crate::keys::{KeyProvider, SessionKeys}; @@ -71,13 +71,28 @@ impl Client { rng: &mut R, ) -> Result, crate::transcryptor::BatchError> where - M: Encryptable, + M: BatchEncryptable, SessionKeys: KeyProvider, R: Rng + CryptoRng, { super::batch::encrypt_batch(messages, self.keys.get_key(), rng) } + /// Encrypt a batch of messages without padding or preprocessing. + #[cfg(feature = "insecure")] + pub fn encrypt_batch_raw( + &self, + messages: &[M], + rng: &mut R, + ) -> Result, crate::transcryptor::BatchError> + where + M: Encryptable, + SessionKeys: KeyProvider, + R: Rng + CryptoRng, + { + super::batch::encrypt_batch_raw(messages, self.keys.get_key(), rng) + } + /// Decrypt a batch of encrypted messages with the appropriate session secret key. /// Automatically selects the correct key (pseudonym or attribute) based on the encrypted type. /// With the `elgamal3` feature, returns an error if any decryption fails. diff --git a/src/lib/client/wasm/distributed.rs b/src/lib/client/wasm/distributed.rs index 7bc93c4..3c3a902 100644 --- a/src/lib/client/wasm/distributed.rs +++ b/src/lib/client/wasm/distributed.rs @@ -396,6 +396,25 @@ impl WASMClient { WASMEncryptedPEPJSONValue(encrypted) } + /// Encrypt a PEPJSONValue using session keys. + #[cfg(all(feature = "json", feature = "batch"))] + #[wasm_bindgen(js_name = encryptJSONBatch)] + pub fn wasm_encrypt_json_batch( + &self, + values: Vec, + ) -> Result, wasm_bindgen::JsValue> { + let mut rng = rand::rng(); + let rust_values: Vec<_> = values.into_iter().map(|m| m.0).collect(); + let encrypted = self + .0 + .encrypt_batch(&rust_values, &mut rng) + .map_err(|e| wasm_bindgen::JsValue::from_str(&format!("{}", e)))?; + Ok(encrypted + .into_iter() + .map(WASMEncryptedPEPJSONValue) + .collect()) + } + /// Decrypt an encrypted PEPJSONValue using session keys. #[cfg(all(feature = "json", feature = "elgamal3"))] #[wasm_bindgen(js_name = decryptJSON)] @@ -414,4 +433,34 @@ impl WASMClient { use crate::data::traits::Encrypted; WASMPEPJSONValue(encrypted.0.decrypt(&self.0.keys)) } + + /// Decrypt a batch of encrypted PEPJSONValues using session keys. + #[cfg(all(feature = "json", feature = "batch", feature = "elgamal3"))] + #[wasm_bindgen(js_name = decryptJSONBatch)] + pub fn wasm_decrypt_json_batch( + &self, + encrypted: Vec, + ) -> Result, wasm_bindgen::JsValue> { + let rust_encrypted: Vec<_> = encrypted.into_iter().map(|e| e.0).collect(); + let decrypted = self + .0 + .decrypt_batch(&rust_encrypted) + .map_err(|e| wasm_bindgen::JsValue::from_str(&format!("{}", e)))?; + Ok(decrypted.into_iter().map(WASMPEPJSONValue).collect()) + } + + /// Decrypt a batch of encrypted PEPJSONValues using session keys. + #[cfg(all(feature = "json", feature = "batch", not(feature = "elgamal3")))] + #[wasm_bindgen(js_name = decryptJSONBatch)] + pub fn wasm_decrypt_json_batch( + &self, + encrypted: Vec, + ) -> Result, wasm_bindgen::JsValue> { + let rust_encrypted: Vec<_> = encrypted.into_iter().map(|e| e.0).collect(); + let decrypted = self + .0 + .decrypt_batch(&rust_encrypted) + .map_err(|e| wasm_bindgen::JsValue::from_str(&format!("{}", e)))?; + Ok(decrypted.into_iter().map(WASMPEPJSONValue).collect()) + } } diff --git a/src/lib/client/wasm/types.rs b/src/lib/client/wasm/types.rs index 50f569a..7568df3 100644 --- a/src/lib/client/wasm/types.rs +++ b/src/lib/client/wasm/types.rs @@ -2,21 +2,27 @@ #[cfg(feature = "offline")] use crate::client::OfflineClient; -#[cfg(feature = "json")] +#[cfg(all(feature = "offline", feature = "json"))] use crate::data::wasm::json::{WASMEncryptedPEPJSONValue, WASMPEPJSONValue}; -#[cfg(feature = "long")] +#[cfg(all(feature = "offline", feature = "long"))] use crate::data::wasm::long::{ WASMLongAttribute, WASMLongEncryptedAttribute, WASMLongEncryptedPseudonym, WASMLongPseudonym, }; -#[cfg(feature = "long")] +#[cfg(all(feature = "offline", feature = "long"))] use crate::data::wasm::records::{WASMLongRecord, WASMLongRecordEncrypted}; +#[cfg(feature = "offline")] use crate::data::wasm::records::{WASMRecord, WASMRecordEncrypted}; +#[cfg(feature = "offline")] use crate::data::wasm::simple::{ WASMAttribute, WASMEncryptedAttribute, WASMEncryptedPseudonym, WASMPseudonym, }; +#[cfg(feature = "offline")] use crate::keys::wasm::types::WASMGlobalPublicKeys; +#[cfg(feature = "offline")] use crate::keys::*; +#[cfg(feature = "offline")] use derive_more::{Deref, From, Into}; +#[cfg(feature = "offline")] use wasm_bindgen::prelude::*; /// An offline PEP client. @@ -174,4 +180,23 @@ impl WASMOfflinePEPClient { let encrypted = value.0.encrypt_global(&self.0.global_public_keys, &mut rng); WASMEncryptedPEPJSONValue(encrypted) } + + /// Encrypt a batch of PEPJSONValues with global keys. + #[cfg(all(feature = "json", feature = "batch"))] + #[wasm_bindgen(js_name = encryptJSONBatch)] + pub fn wasm_encrypt_json_batch( + &self, + values: Vec, + ) -> Result, wasm_bindgen::JsValue> { + let mut rng = rand::rng(); + let rust_values: Vec<_> = values.into_iter().map(|m| m.0).collect(); + let encrypted = self + .0 + .encrypt_batch(&rust_values, &mut rng) + .map_err(|e| wasm_bindgen::JsValue::from_str(&format!("{}", e)))?; + Ok(encrypted + .into_iter() + .map(WASMEncryptedPEPJSONValue) + .collect()) + } } diff --git a/src/lib/data/json/data.rs b/src/lib/data/json/data.rs index 1bfd056..baabf48 100644 --- a/src/lib/data/json/data.rs +++ b/src/lib/data/json/data.rs @@ -2,13 +2,14 @@ use super::utils::{bool_to_byte, byte_to_bool, bytes_to_number, number_to_bytes}; use crate::arithmetic::scalars::ScalarNonZero; +use crate::data::json::unify_structures; #[cfg(feature = "long")] use crate::data::long::{ LongAttribute, LongEncryptedAttribute, LongEncryptedPseudonym, LongPseudonym, }; use crate::data::padding::Padded; use crate::data::simple::{Attribute, EncryptedAttribute, EncryptedPseudonym, Pseudonym}; -use crate::data::traits::{Encryptable, Encrypted, Transcryptable}; +use crate::data::traits::{BatchEncryptable, Encryptable, Encrypted, Transcryptable}; use crate::factors::RerandomizeFactor; use crate::factors::TranscryptionInfo; #[cfg(feature = "offline")] @@ -16,12 +17,14 @@ use crate::keys::GlobalPublicKeys; #[cfg(all(feature = "offline", feature = "insecure"))] use crate::keys::GlobalSecretKeys; use crate::keys::SessionKeys; +use crate::transcryptor::BatchError; use rand_core::{CryptoRng, Rng}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; use serde_json::Value; use std::collections::HashMap; use thiserror::Error; + #[derive(Debug, Error)] pub enum JsonError { #[error("invalid boolean byte value: 0x{got:02x}. expected 0x00 or 0x01")] @@ -44,6 +47,15 @@ pub enum JsonError { #[error("failed to parse string: {0}")] StringPadding(String), + + #[error("structure mismatch: expected {expected:?}, got {got:?}")] + StructureMismatch { + expected: super::structure::JSONStructure, + got: super::structure::JSONStructure, + }, + + #[error("cannot normalize: current size {current} exceeds target size {target}")] + SizeExceedsTarget { current: usize, target: usize }, } /// A JSON value where primitive types are stored as unencrypted PEP types. /// @@ -221,6 +233,231 @@ impl PEPJSONValue { } } } + + /// Pads this PEPJSONValue to match a target structure by adding external padding blocks. + /// + /// This method adds external padding blocks (separate from any internal PKCS#7-style + /// padding used inside individual ciphertext blocks) to `LongString` and + /// `LongPseudonym` variants to ensure all instances have the same number of blocks + /// when encrypted. This is necessary for batch transcryption where all encrypted values must + /// have identical structure to prevent linkability. + /// + /// The external padding blocks are all-zero blocks `[0x00, 0x00, ...]` that contain no user data. + /// These padding blocks are automatically detected and removed during decoding, + /// ensuring the original values are perfectly preserved. + /// + /// # Parameters + /// + /// - `structure`: The target structure specifying the number of blocks for each field + /// + /// # Returns + /// + /// Returns a padded `PEPJSONValue` with padding blocks added where necessary. + /// + /// # Errors + /// + /// Returns an error if: + /// - The current structure doesn't match the target structure type + /// - The current size exceeds the target size (cannot pad by removing blocks) + /// + /// # Example + /// + /// ```no_run + /// use libpep::data::json::data::PEPJSONValue; + /// use libpep::data::json::structure::JSONStructure; + /// use serde_json::json; + /// + /// let value1 = PEPJSONValue::from_value(&json!("hi")); + /// let value2 = PEPJSONValue::from_value(&json!("hello world")); + /// + /// // value2 has more blocks than value1 + /// // Pad value1 to match value2's structure + /// let target = JSONStructure::String(2); + /// let padded = value1.pad_to(&target).unwrap(); + /// ``` + pub fn pad_to(&self, structure: &super::structure::JSONStructure) -> Result { + use super::structure::JSONStructure; + + match (self, structure) { + (Self::Null, JSONStructure::Null) => Ok(Self::Null), + (Self::Bool(attr), JSONStructure::Bool) => Ok(Self::Bool(*attr)), + (Self::Number(attr), JSONStructure::Number) => Ok(Self::Number(*attr)), + + // Short string (1 block) + (Self::String(attr), JSONStructure::String(1)) => Ok(Self::String(*attr)), + + // Short string needs to be expanded to long string + #[cfg(feature = "long")] + (Self::String(attr), JSONStructure::String(target_blocks)) if *target_blocks > 1 => { + // Convert to LongAttribute with 1 block, then pad + let long_attr = LongAttribute::from(vec![*attr]); + let padded = long_attr.pad_to(*target_blocks).map_err(|e| { + if e.kind() == std::io::ErrorKind::InvalidInput { + JsonError::SizeExceedsTarget { + current: long_attr.len(), + target: *target_blocks, + } + } else { + JsonError::StringPadding(format!("{e:?}")) + } + })?; + Ok(Self::LongString(padded)) + } + + // Long string normalization + #[cfg(feature = "long")] + (Self::LongString(long_attr), JSONStructure::String(target_blocks)) => { + let padded = long_attr.pad_to(*target_blocks).map_err(|e| { + if e.kind() == std::io::ErrorKind::InvalidInput { + JsonError::SizeExceedsTarget { + current: long_attr.len(), + target: *target_blocks, + } + } else { + JsonError::StringPadding(format!("{e:?}")) + } + })?; + Ok(Self::LongString(padded)) + } + + // Short pseudonym (1 block) + (Self::Pseudonym(pseudo), JSONStructure::Pseudonym(1)) => Ok(Self::Pseudonym(*pseudo)), + + // Short pseudonym needs to be expanded to long pseudonym + #[cfg(feature = "long")] + (Self::Pseudonym(pseudo), JSONStructure::Pseudonym(target_blocks)) + if *target_blocks > 1 => + { + // Convert to LongPseudonym with 1 block, then pad + let long_pseudo = LongPseudonym::from(vec![*pseudo]); + let padded = long_pseudo.pad_to(*target_blocks).map_err(|e| { + if e.kind() == std::io::ErrorKind::InvalidInput { + JsonError::SizeExceedsTarget { + current: long_pseudo.len(), + target: *target_blocks, + } + } else { + JsonError::StringPadding(format!("{e:?}")) + } + })?; + Ok(Self::LongPseudonym(padded)) + } + + // Long pseudonym normalization + #[cfg(feature = "long")] + (Self::LongPseudonym(long_pseudo), JSONStructure::Pseudonym(target_blocks)) => { + let padded = long_pseudo.pad_to(*target_blocks).map_err(|e| { + if e.kind() == std::io::ErrorKind::InvalidInput { + JsonError::SizeExceedsTarget { + current: long_pseudo.len(), + target: *target_blocks, + } + } else { + JsonError::StringPadding(format!("{e:?}")) + } + })?; + Ok(Self::LongPseudonym(padded)) + } + + // Array padding - recursively pad each element + (Self::Array(arr), JSONStructure::Array(target_structures)) => { + if arr.len() != target_structures.len() { + return Err(JsonError::StructureMismatch { + expected: structure.clone(), + got: self.structure(), + }); + } + + let padded: Result, _> = arr + .iter() + .zip(target_structures.iter()) + .map(|(value, target)| value.pad_to(target)) + .collect(); + + Ok(Self::Array(padded?)) + } + + // Object padding - recursively pad each field + (Self::Object(obj), JSONStructure::Object(target_fields)) => { + let mut padded = HashMap::new(); + + for (key, target_struct) in target_fields { + match obj.get(key) { + Some(value) => { + padded.insert(key.clone(), value.pad_to(target_struct)?); + } + None => { + return Err(JsonError::StructureMismatch { + expected: structure.clone(), + got: self.structure(), + }); + } + } + } + + // Check for extra fields in the object + if obj.len() != target_fields.len() { + return Err(JsonError::StructureMismatch { + expected: structure.clone(), + got: self.structure(), + }); + } + + Ok(Self::Object(padded)) + } + + // Mismatched structure types + _ => Err(JsonError::StructureMismatch { + expected: structure.clone(), + got: self.structure(), + }), + } + } + + /// Get the structure/shape of this PEPJSONValue. + /// + /// This returns a structure descriptor that captures the type and block count + /// of each field, without including the actual data values. + /// + /// # Example + /// + /// ```no_run + /// use libpep::data::json::data::PEPJSONValue; + /// use libpep::data::json::structure::JSONStructure; + /// use serde_json::json; + /// + /// let value = PEPJSONValue::from_value(&json!({ + /// "name": "Alice", + /// "age": 30 + /// })); + /// + /// let structure = value.structure(); + /// // structure describes the shape: Object with String(1) and Number fields + /// ``` + pub fn structure(&self) -> super::structure::JSONStructure { + use super::structure::JSONStructure; + + match self { + Self::Null => JSONStructure::Null, + Self::Bool(_) => JSONStructure::Bool, + Self::Number(_) => JSONStructure::Number, + Self::String(_) => JSONStructure::String(1), + #[cfg(feature = "long")] + Self::LongString(long_attr) => JSONStructure::String(long_attr.len()), + Self::Pseudonym(_) => JSONStructure::Pseudonym(1), + #[cfg(feature = "long")] + Self::LongPseudonym(long_pseudo) => JSONStructure::Pseudonym(long_pseudo.len()), + Self::Array(arr) => JSONStructure::Array(arr.iter().map(|v| v.structure()).collect()), + Self::Object(obj) => { + let mut fields: Vec<_> = obj + .iter() + .map(|(k, v)| (k.clone(), v.structure())) + .collect(); + fields.sort_by(|a, b| a.0.cmp(&b.0)); + JSONStructure::Object(fields) + } + } + } } impl Encryptable for PEPJSONValue { @@ -598,6 +835,25 @@ impl crate::data::traits::HasStructure for EncryptedPEPJSONValue { } } +#[cfg(feature = "batch")] +impl BatchEncryptable for PEPJSONValue { + fn preprocess_batch(items: &[Self]) -> Result, BatchError> { + if items.is_empty() { + return Ok(Vec::new()); + } + + // Collect and unify structures + let structures: Vec<_> = items.iter().map(|v| v.structure()).collect(); + let unified = unify_structures(&structures)?; + + // Pad each item to unified structure + Ok(items + .iter() + .map(|item| item.pad_to(&unified)) + .collect::, _>>()?) + } +} + #[cfg(test)] #[allow(clippy::unwrap_used, clippy::expect_used)] mod tests { @@ -948,4 +1204,330 @@ mod tests { let decrypted2 = decrypt(&encrypted2, &keys); assert_eq!(decrypted1, decrypted2); } + + #[test] + #[cfg(feature = "long")] + fn normalize_short_string_to_long() { + use super::super::structure::JSONStructure; + + // Short string (1 block) + let short_value = PEPJSONValue::from_value(&json!("hi")); + assert_eq!(short_value.structure(), JSONStructure::String(1)); + + // Normalize to 3 blocks + let normalized = short_value.pad_to(&JSONStructure::String(3)).unwrap(); + assert_eq!(normalized.structure(), JSONStructure::String(3)); + + // Verify it's now a LongString + match normalized { + PEPJSONValue::LongString(ref long_attr) => { + assert_eq!(long_attr.len(), 3); + } + _ => panic!("Expected LongString after normalization"), + } + } + + #[test] + #[cfg(feature = "long")] + fn normalize_long_string_adds_padding() { + use super::super::structure::JSONStructure; + + // Long string (2 blocks) + let long_value = PEPJSONValue::from_value(&json!("This is a longer string")); + let initial_structure = long_value.structure(); + + // Get current block count + let current_blocks = match initial_structure { + JSONStructure::String(n) => n, + _ => panic!("Expected String structure"), + }; + + // Normalize to more blocks + let target_blocks = current_blocks + 2; + let normalized = long_value + .pad_to(&JSONStructure::String(target_blocks)) + .unwrap(); + assert_eq!(normalized.structure(), JSONStructure::String(target_blocks)); + + // Verify block count increased + match normalized { + PEPJSONValue::LongString(ref long_attr) => { + assert_eq!(long_attr.len(), target_blocks); + } + _ => panic!("Expected LongString"), + } + } + + #[test] + #[cfg(feature = "long")] + fn normalize_strings_different_sizes_encrypt_decrypt() { + let mut rng = rand::rng(); + let keys = make_test_keys(); + + // Create strings of different sizes + let short = PEPJSONValue::from_value(&json!("hi")); + let medium = PEPJSONValue::from_value(&json!("hello world")); + let long = + PEPJSONValue::from_value(&json!("This is a much longer string with more content")); + + // Find the maximum block count + let max_blocks = [&short, &medium, &long] + .iter() + .map(|v| match v.structure() { + super::super::structure::JSONStructure::String(n) => n, + _ => 0, + }) + .max() + .unwrap(); + + // Normalize all to the same structure + let target = super::super::structure::JSONStructure::String(max_blocks); + let short_normalized = short.pad_to(&target).unwrap(); + let medium_normalized = medium.pad_to(&target).unwrap(); + let long_normalized = long.pad_to(&target).unwrap(); + + // All should have the same structure now + assert_eq!(short_normalized.structure(), target); + assert_eq!(medium_normalized.structure(), target); + assert_eq!(long_normalized.structure(), target); + + // Encrypt all values + let short_encrypted = encrypt(&short_normalized, &keys, &mut rng); + let medium_encrypted = encrypt(&medium_normalized, &keys, &mut rng); + let long_encrypted = encrypt(&long_normalized, &keys, &mut rng); + + // All encrypted values should have the same structure + assert_eq!(short_encrypted.structure(), medium_encrypted.structure()); + assert_eq!(medium_encrypted.structure(), long_encrypted.structure()); + + // Decrypt and verify original values are preserved + #[cfg(feature = "elgamal3")] + { + let short_decrypted = decrypt(&short_encrypted, &keys).unwrap(); + let medium_decrypted = decrypt(&medium_encrypted, &keys).unwrap(); + let long_decrypted = decrypt(&long_encrypted, &keys).unwrap(); + + assert_eq!(json!("hi"), short_decrypted.to_value().unwrap()); + assert_eq!(json!("hello world"), medium_decrypted.to_value().unwrap()); + assert_eq!( + json!("This is a much longer string with more content"), + long_decrypted.to_value().unwrap() + ); + } + + #[cfg(not(feature = "elgamal3"))] + { + let short_decrypted = decrypt(&short_encrypted, &keys); + let medium_decrypted = decrypt(&medium_encrypted, &keys); + let long_decrypted = decrypt(&long_encrypted, &keys); + + assert_eq!(json!("hi"), short_decrypted.to_value().unwrap()); + assert_eq!(json!("hello world"), medium_decrypted.to_value().unwrap()); + assert_eq!( + json!("This is a much longer string with more content"), + long_decrypted.to_value().unwrap() + ); + } + } + + #[test] + #[cfg(feature = "long")] + fn normalize_pseudonyms_different_sizes() { + use super::super::structure::JSONStructure; + use crate::pep_json; + + let mut rng = rand::rng(); + let keys = make_test_keys(); + + // Create pseudonyms of different sizes + let short_pseudo = pep_json!(pseudonym("user123")); + let long_pseudo = pep_json!(pseudonym("user@example.com.with.a.very.long.domain")); + + // Find the maximum block count + let max_blocks = [&short_pseudo, &long_pseudo] + .iter() + .map(|v| match v.structure() { + JSONStructure::Pseudonym(n) => n, + _ => 0, + }) + .max() + .unwrap(); + + // Normalize both to the same structure + let target = JSONStructure::Pseudonym(max_blocks); + let short_normalized = short_pseudo.pad_to(&target).unwrap(); + let long_normalized = long_pseudo.pad_to(&target).unwrap(); + + // Both should have the same structure now + assert_eq!(short_normalized.structure(), target); + assert_eq!(long_normalized.structure(), target); + + // Encrypt and verify structures match + let short_encrypted = encrypt(&short_normalized, &keys, &mut rng); + let long_encrypted = encrypt(&long_normalized, &keys, &mut rng); + + assert_eq!(short_encrypted.structure(), long_encrypted.structure()); + + // Decrypt and verify original values are preserved + #[cfg(feature = "elgamal3")] + { + let short_decrypted = decrypt(&short_encrypted, &keys).unwrap(); + let long_decrypted = decrypt(&long_encrypted, &keys).unwrap(); + + assert_eq!(json!("user123"), short_decrypted.to_value().unwrap()); + assert_eq!( + json!("user@example.com.with.a.very.long.domain"), + long_decrypted.to_value().unwrap() + ); + } + + #[cfg(not(feature = "elgamal3"))] + { + let short_decrypted = decrypt(&short_encrypted, &keys); + let long_decrypted = decrypt(&long_encrypted, &keys); + + assert_eq!(json!("user123"), short_decrypted.to_value().unwrap()); + assert_eq!( + json!("user@example.com.with.a.very.long.domain"), + long_decrypted.to_value().unwrap() + ); + } + } + + #[test] + #[cfg(feature = "long")] + fn normalize_nested_objects_different_string_sizes() { + let mut rng = rand::rng(); + let keys = make_test_keys(); + + // Create two objects with strings of different sizes + let obj1 = PEPJSONValue::from_value(&json!({ + "name": "Alice", + "email": "a@b.c" + })); + + let obj2 = PEPJSONValue::from_value(&json!({ + "name": "Bob", + "email": "bob.smith@example.com" + })); + + // Get structures + let struct1 = obj1.structure(); + let struct2 = obj2.structure(); + + // Use the public unify_structures function + let unified = super::super::structure::unify_structures(&[struct1, struct2]).unwrap(); + + // Normalize both objects + let obj1_normalized = obj1.pad_to(&unified).unwrap(); + let obj2_normalized = obj2.pad_to(&unified).unwrap(); + + // Both should have the same structure now + assert_eq!(obj1_normalized.structure(), obj2_normalized.structure()); + + // Encrypt both + let obj1_encrypted = encrypt(&obj1_normalized, &keys, &mut rng); + let obj2_encrypted = encrypt(&obj2_normalized, &keys, &mut rng); + + // Structures should match + assert_eq!(obj1_encrypted.structure(), obj2_encrypted.structure()); + + // Decrypt and verify original values + #[cfg(feature = "elgamal3")] + { + let obj1_decrypted = decrypt(&obj1_encrypted, &keys).unwrap(); + let obj2_decrypted = decrypt(&obj2_encrypted, &keys).unwrap(); + + assert_eq!( + json!({"name": "Alice", "email": "a@b.c"}), + obj1_decrypted.to_value().unwrap() + ); + assert_eq!( + json!({"name": "Bob", "email": "bob.smith@example.com"}), + obj2_decrypted.to_value().unwrap() + ); + } + + #[cfg(not(feature = "elgamal3"))] + { + let obj1_decrypted = decrypt(&obj1_encrypted, &keys); + let obj2_decrypted = decrypt(&obj2_encrypted, &keys); + + assert_eq!( + json!({"name": "Alice", "email": "a@b.c"}), + obj1_decrypted.to_value().unwrap() + ); + assert_eq!( + json!({"name": "Bob", "email": "bob.smith@example.com"}), + obj2_decrypted.to_value().unwrap() + ); + } + } + + #[test] + #[cfg(feature = "long")] + fn normalize_errors_when_size_exceeds_target() { + use super::super::structure::JSONStructure; + + // Create a long string (multiple blocks) + let long_value = PEPJSONValue::from_value(&json!( + "This is a very long string that will take multiple blocks" + )); + + let current_blocks = match long_value.structure() { + JSONStructure::String(n) => n, + _ => panic!("Expected String structure"), + }; + + // Try to normalize to fewer blocks - should fail + let result = long_value.pad_to(&JSONStructure::String(current_blocks - 1)); + assert!(result.is_err()); + + match result { + Err(JsonError::SizeExceedsTarget { current, target }) => { + assert_eq!(current, current_blocks); + assert_eq!(target, current_blocks - 1); + } + _ => panic!("Expected SizeExceedsTarget error"), + } + } + + #[test] + #[cfg(feature = "long")] + fn normalize_errors_on_structure_mismatch() { + use super::super::structure::JSONStructure; + + // Create a string value + let string_value = PEPJSONValue::from_value(&json!("hello")); + + // Try to normalize to a number structure - should fail + let result = string_value.pad_to(&JSONStructure::Number); + assert!(result.is_err()); + + match result { + Err(JsonError::StructureMismatch { expected, got }) => { + assert_eq!(expected, JSONStructure::Number); + assert_eq!(got, JSONStructure::String(1)); + } + _ => panic!("Expected StructureMismatch error"), + } + } + + #[test] + fn normalize_preserves_primitives() { + use super::super::structure::JSONStructure; + + // Test that null, bool, and number normalization works + let null_value = PEPJSONValue::from_value(&json!(null)); + let bool_value = PEPJSONValue::from_value(&json!(true)); + let number_value = PEPJSONValue::from_value(&json!(42)); + + let null_normalized = null_value.pad_to(&JSONStructure::Null).unwrap(); + let bool_normalized = bool_value.pad_to(&JSONStructure::Bool).unwrap(); + let number_normalized = number_value.pad_to(&JSONStructure::Number).unwrap(); + + assert_eq!(null_normalized, null_value); + assert_eq!(bool_normalized, bool_value); + assert_eq!(number_normalized, number_value); + } } diff --git a/src/lib/data/json/macros.rs b/src/lib/data/json/macros.rs index a2e30df..396f422 100644 --- a/src/lib/data/json/macros.rs +++ b/src/lib/data/json/macros.rs @@ -30,6 +30,20 @@ /// ``` #[macro_export] macro_rules! pep_json { + // Entry point for standalone pseudonym + (pseudonym($value:expr)) => {{ + use $crate::data::padding::Padded; + let value = $value; + let s_str: &str = value.as_ref(); + // Always try short first, then fall back to long if needed + match $crate::data::simple::Pseudonym::from_string_padded(s_str) { + Ok(pseudo) => $crate::data::json::data::PEPJSONValue::Pseudonym(pseudo), + Err(_) => $crate::data::json::data::PEPJSONValue::LongPseudonym( + $crate::data::long::LongPseudonym::from_string_padded(s_str) + ) + } + }}; + // Entry point for object ({ $($tt:tt)* }) => {{ let builder = $crate::data::json::builder::PEPJSONBuilder::new(); @@ -43,12 +57,14 @@ macro_rules! pep_json { // Pseudonym field (last field, no trailing comma) (@object $builder:ident, $key:literal : pseudonym($value:expr)) => {{ - $builder.pseudonym($key, $value).build() + let value = $value; + $builder.pseudonym($key, value.as_ref()).build() }}; // Pseudonym field with more fields following (@object $builder:ident, $key:literal : pseudonym($value:expr), $($rest:tt)*) => {{ - let builder = $builder.pseudonym($key, $value); + let value = $value; + let builder = $builder.pseudonym($key, value.as_ref()); pep_json!(@object builder, $($rest)*) }}; @@ -245,4 +261,47 @@ mod tests { assert_eq!(expected, decrypted.to_value().unwrap()); } + + #[test] + fn macro_with_string_variables() { + let mut rng = rand::rng(); + let keys = make_test_keys(); + + // Test with String variables (not just string literals) + let user_id = String::from("user@example.com"); + let pep_value = pep_json!({ + "id": pseudonym(user_id) + }); + + let encrypted = encrypt(&pep_value, &keys, &mut rng); + #[cfg(feature = "elgamal3")] + let decrypted = decrypt(&encrypted, &keys).unwrap(); + + #[cfg(not(feature = "elgamal3"))] + let decrypted = decrypt(&encrypted, &keys); + + let expected = json!({ + "id": "user@example.com" + }); + + assert_eq!(expected, decrypted.to_value().unwrap()); + } + + #[test] + fn macro_standalone_pseudonym_with_string() { + // Test standalone pseudonym with String variable + let user_id = String::from("test@example.com"); + let pep_value = pep_json!(pseudonym(user_id)); + + // Verify it creates the correct variant + match pep_value { + crate::data::json::data::PEPJSONValue::Pseudonym(_) => { + // Expected for short pseudonyms + } + crate::data::json::data::PEPJSONValue::LongPseudonym(_) => { + // Also acceptable if string is long + } + _ => panic!("Expected Pseudonym or LongPseudonym variant"), + } + } } diff --git a/src/lib/data/json/structure.rs b/src/lib/data/json/structure.rs index 5010bfe..2b5d3c8 100644 --- a/src/lib/data/json/structure.rs +++ b/src/lib/data/json/structure.rs @@ -4,6 +4,24 @@ use super::data::EncryptedPEPJSONValue; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use thiserror::Error; + +/// Errors that can occur when unifying structures. +#[derive(Debug, Error, Clone, PartialEq, Eq)] +pub enum UnifyError { + #[error("Cannot unify empty list of structures")] + EmptyList, + + #[error("Incompatible structure types: {0:?} and {1:?}")] + IncompatibleTypes(JSONStructure, JSONStructure), + + #[error("Arrays have different lengths: {0} and {1}")] + ArrayLengthMismatch(usize, usize), + + #[error("Objects have different fields")] + ObjectFieldMismatch, +} /// Structure descriptor that describes the shape of an EncryptedPEPJSONValue without its actual encrypted data. /// @@ -23,6 +41,153 @@ pub enum JSONStructure { Object(Vec<(String, JSONStructure)>), } +/// Unifies multiple JSON structures by taking the maximum block count for each field. +/// +/// This function is useful for batch operations where you need to normalize multiple +/// values to have the same structure. It recursively unifies nested structures, +/// taking the maximum block count for strings and pseudonyms, and ensuring that +/// arrays and objects have compatible structures. +/// +/// # Parameters +/// +/// - `structures`: A slice of JSON structures to unify +/// +/// # Returns +/// +/// Returns a unified `JSONStructure` where: +/// - For `String` and `Pseudonym`: the maximum block count across all inputs +/// - For `Array`: recursively unified element structures (all arrays must have same length) +/// - For `Object`: recursively unified field structures (all objects must have same fields) +/// - For primitives (`Null`, `Bool`, `Number`): the same type (all must match) +/// +/// # Errors +/// +/// Returns an error if: +/// - The input list is empty +/// - Structures have incompatible types (e.g., mixing `String` with `Number`) +/// - Arrays have different lengths +/// - Objects have different sets of fields +/// +/// # Example +/// +/// ```no_run +/// use libpep::data::json::structure::{JSONStructure, unify_structures}; +/// +/// let struct1 = JSONStructure::String(1); // "hi" +/// let struct2 = JSONStructure::String(2); // "hello" +/// let struct3 = JSONStructure::String(3); // "hello world" +/// +/// let unified = unify_structures(&[struct1, struct2, struct3]).unwrap(); +/// assert_eq!(unified, JSONStructure::String(3)); // Maximum block count +/// ``` +/// +/// ## Object Example +/// +/// ```no_run +/// use libpep::data::json::structure::{JSONStructure, unify_structures}; +/// +/// let obj1 = JSONStructure::Object(vec![ +/// ("name".to_string(), JSONStructure::String(1)), +/// ("email".to_string(), JSONStructure::String(1)), +/// ]); +/// +/// let obj2 = JSONStructure::Object(vec![ +/// ("name".to_string(), JSONStructure::String(1)), +/// ("email".to_string(), JSONStructure::String(3)), +/// ]); +/// +/// let unified = unify_structures(&[obj1, obj2]).unwrap(); +/// // Result: email field has 3 blocks (max of 1 and 3) +/// ``` +pub fn unify_structures(structures: &[JSONStructure]) -> Result { + if structures.is_empty() { + return Err(UnifyError::EmptyList); + } + + if structures.len() == 1 { + return Ok(structures[0].clone()); + } + + // Start with the first structure and unify with all others + let mut unified = structures[0].clone(); + for structure in &structures[1..] { + unified = unify_two_structures(&unified, structure)?; + } + + Ok(unified) +} + +/// Helper function to unify two structures. +fn unify_two_structures( + s1: &JSONStructure, + s2: &JSONStructure, +) -> Result { + match (s1, s2) { + // Primitives must match exactly + (JSONStructure::Null, JSONStructure::Null) => Ok(JSONStructure::Null), + (JSONStructure::Bool, JSONStructure::Bool) => Ok(JSONStructure::Bool), + (JSONStructure::Number, JSONStructure::Number) => Ok(JSONStructure::Number), + + // Strings: take maximum block count + (JSONStructure::String(n1), JSONStructure::String(n2)) => { + Ok(JSONStructure::String(*n1.max(n2))) + } + + // Pseudonyms: take maximum block count + (JSONStructure::Pseudonym(n1), JSONStructure::Pseudonym(n2)) => { + Ok(JSONStructure::Pseudonym(*n1.max(n2))) + } + + // Arrays: must have same length, unify element-wise + (JSONStructure::Array(arr1), JSONStructure::Array(arr2)) => { + if arr1.len() != arr2.len() { + return Err(UnifyError::ArrayLengthMismatch(arr1.len(), arr2.len())); + } + + let unified_elements: Result, _> = arr1 + .iter() + .zip(arr2.iter()) + .map(|(e1, e2)| unify_two_structures(e1, e2)) + .collect(); + + Ok(JSONStructure::Array(unified_elements?)) + } + + // Objects: must have same fields, unify field-wise + (JSONStructure::Object(fields1), JSONStructure::Object(fields2)) => { + // Convert to HashMaps for easier lookup (using owned String keys) + let map1: HashMap = + fields1.iter().map(|(k, v)| (k.clone(), v)).collect(); + let map2: HashMap = + fields2.iter().map(|(k, v)| (k.clone(), v)).collect(); + + // Check that both objects have the same set of keys + if map1.len() != map2.len() { + return Err(UnifyError::ObjectFieldMismatch); + } + + let mut unified_fields = Vec::new(); + for (key, val1) in &map1 { + match map2.get(key) { + Some(val2) => { + let unified_val = unify_two_structures(val1, val2)?; + unified_fields.push((key.clone(), unified_val)); + } + None => return Err(UnifyError::ObjectFieldMismatch), + } + } + + // Sort fields to ensure consistent ordering + unified_fields.sort_by(|a, b| a.0.cmp(&b.0)); + + Ok(JSONStructure::Object(unified_fields)) + } + + // Incompatible types + _ => Err(UnifyError::IncompatibleTypes(s1.clone(), s2.clone())), + } +} + /// Methods for extracting structure from EncryptedPEPJSONValue impl EncryptedPEPJSONValue { /// Get the structure/shape of this EncryptedPEPJSONValue @@ -260,4 +425,227 @@ mod tests { let deserialized: JSONStructure = serde_json::from_str(&json_str).unwrap(); assert_eq!(structure, deserialized); } + + #[test] + fn unify_strings_different_sizes() { + let s1 = JSONStructure::String(1); + let s2 = JSONStructure::String(2); + let s3 = JSONStructure::String(3); + + let unified = unify_structures(&[s1, s2, s3]).unwrap(); + assert_eq!(unified, JSONStructure::String(3)); + } + + #[test] + fn unify_pseudonyms_different_sizes() { + let p1 = JSONStructure::Pseudonym(1); + let p2 = JSONStructure::Pseudonym(4); + let p3 = JSONStructure::Pseudonym(2); + + let unified = unify_structures(&[p1, p2, p3]).unwrap(); + assert_eq!(unified, JSONStructure::Pseudonym(4)); + } + + #[test] + fn unify_primitives() { + let null_structures = vec![JSONStructure::Null, JSONStructure::Null]; + assert_eq!( + unify_structures(&null_structures).unwrap(), + JSONStructure::Null + ); + + let bool_structures = vec![JSONStructure::Bool, JSONStructure::Bool]; + assert_eq!( + unify_structures(&bool_structures).unwrap(), + JSONStructure::Bool + ); + + let num_structures = vec![ + JSONStructure::Number, + JSONStructure::Number, + JSONStructure::Number, + ]; + assert_eq!( + unify_structures(&num_structures).unwrap(), + JSONStructure::Number + ); + } + + #[test] + fn unify_arrays() { + let arr1 = JSONStructure::Array(vec![JSONStructure::String(1), JSONStructure::Number]); + + let arr2 = JSONStructure::Array(vec![JSONStructure::String(3), JSONStructure::Number]); + + let unified = unify_structures(&[arr1, arr2]).unwrap(); + assert_eq!( + unified, + JSONStructure::Array(vec![JSONStructure::String(3), JSONStructure::Number,]) + ); + } + + #[test] + fn unify_objects() { + let obj1 = JSONStructure::Object(vec![ + ("name".to_string(), JSONStructure::String(1)), + ("email".to_string(), JSONStructure::String(1)), + ]); + + let obj2 = JSONStructure::Object(vec![ + ("name".to_string(), JSONStructure::String(2)), + ("email".to_string(), JSONStructure::String(3)), + ]); + + let unified = unify_structures(&[obj1, obj2]).unwrap(); + + // Check that the unified structure has max block counts + let expected = JSONStructure::Object(vec![ + ("email".to_string(), JSONStructure::String(3)), + ("name".to_string(), JSONStructure::String(2)), + ]); + + assert_eq!(unified, expected); + } + + #[test] + fn unify_nested_objects() { + let obj1 = JSONStructure::Object(vec![ + ( + "user".to_string(), + JSONStructure::Object(vec![ + ("name".to_string(), JSONStructure::String(1)), + ("id".to_string(), JSONStructure::Pseudonym(1)), + ]), + ), + ("count".to_string(), JSONStructure::Number), + ]); + + let obj2 = JSONStructure::Object(vec![ + ( + "user".to_string(), + JSONStructure::Object(vec![ + ("name".to_string(), JSONStructure::String(3)), + ("id".to_string(), JSONStructure::Pseudonym(2)), + ]), + ), + ("count".to_string(), JSONStructure::Number), + ]); + + let unified = unify_structures(&[obj1, obj2]).unwrap(); + + let expected = JSONStructure::Object(vec![ + ("count".to_string(), JSONStructure::Number), + ( + "user".to_string(), + JSONStructure::Object(vec![ + ("id".to_string(), JSONStructure::Pseudonym(2)), + ("name".to_string(), JSONStructure::String(3)), + ]), + ), + ]); + + assert_eq!(unified, expected); + } + + #[test] + fn unify_single_structure() { + let s = JSONStructure::String(5); + let unified = unify_structures(std::slice::from_ref(&s)).unwrap(); + assert_eq!(unified, s); + } + + #[test] + fn unify_empty_list_fails() { + let result = unify_structures(&[]); + assert!(matches!(result, Err(UnifyError::EmptyList))); + } + + #[test] + fn unify_incompatible_types_fails() { + let s1 = JSONStructure::String(1); + let s2 = JSONStructure::Number; + + let result = unify_structures(&[s1, s2]); + assert!(matches!(result, Err(UnifyError::IncompatibleTypes(_, _)))); + } + + #[test] + fn unify_arrays_different_lengths_fails() { + let arr1 = JSONStructure::Array(vec![JSONStructure::Number, JSONStructure::Number]); + let arr2 = JSONStructure::Array(vec![JSONStructure::Number]); + + let result = unify_structures(&[arr1, arr2]); + assert!(matches!(result, Err(UnifyError::ArrayLengthMismatch(2, 1)))); + } + + #[test] + fn unify_objects_different_fields_fails() { + let obj1 = JSONStructure::Object(vec![("name".to_string(), JSONStructure::String(1))]); + + let obj2 = JSONStructure::Object(vec![("email".to_string(), JSONStructure::String(1))]); + + let result = unify_structures(&[obj1, obj2]); + assert!(matches!(result, Err(UnifyError::ObjectFieldMismatch))); + } + + #[test] + fn unify_real_world_example() { + let mut rng = rand::rng(); + let keys = make_test_keys(); + + // Create three different user objects with varying string lengths + let user1 = PEPJSONValue::from_value(&json!({ + "name": "Alice", + "email": "a@b.c" + })); + + let user2 = PEPJSONValue::from_value(&json!({ + "name": "Bob", + "email": "bob@example.com" + })); + + let user3 = PEPJSONValue::from_value(&json!({ + "name": "Charlie Johnson", + "email": "charlie.johnson@verylongdomain.example.com" + })); + + // Encrypt them + let enc1 = encrypt(&user1, &keys, &mut rng); + let enc2 = encrypt(&user2, &keys, &mut rng); + let enc3 = encrypt(&user3, &keys, &mut rng); + + // Get their structures + let struct1 = enc1.structure(); + let struct2 = enc2.structure(); + let struct3 = enc3.structure(); + + // Unify the structures + let unified = unify_structures(&[struct1, struct2, struct3]).unwrap(); + + // The unified structure should have the maximum block count for each field + match unified { + JSONStructure::Object(fields) => { + // Find email and name fields + let email_struct = fields.iter().find(|(k, _)| k == "email").unwrap().1.clone(); + let name_struct = fields.iter().find(|(k, _)| k == "name").unwrap().1.clone(); + + // Email should have the max blocks from all three users + if let JSONStructure::String(email_blocks) = email_struct { + // user3's email is the longest + assert!(email_blocks >= 2); + } else { + panic!("Expected String structure for email"); + } + + // Name should have the max blocks from all three users + if let JSONStructure::String(name_blocks) = name_struct { + // user3's name is the longest + assert!(name_blocks >= 1); + } else { + panic!("Expected String structure for name"); + } + } + _ => panic!("Expected Object structure"), + } + } } diff --git a/src/lib/data/long.rs b/src/lib/data/long.rs index dca89e6..5967e43 100644 --- a/src/lib/data/long.rs +++ b/src/lib/data/long.rs @@ -1,21 +1,33 @@ //! Long (multi-block) data types for pseudonyms and attributes. //! //! This module provides support for multi-block pseudonyms and attributes that can hold -//! more than 16 bytes of data. These types are built on top of PKCS#7 padding. +//! more than 16 bytes of data. +//! +//! # Padding +//! +//! Long data types use PKCS#7 padding (internal padding) automatically for the last block. +//! They also support optional external padding via the `pad_to()` method for batch unlinkability. +//! +//! For detailed information about the two types of padding, see the [`padding`](crate::data::padding) module. use crate::arithmetic::scalars::ScalarNonZero; +use crate::data::padding::external::{create_external_padding_block, is_external_padding_block}; use crate::data::simple::{ Attribute, ElGamalEncryptable, ElGamalEncrypted, EncryptedAttribute, EncryptedPseudonym, Pseudonym, }; -use crate::data::traits::{Encryptable, Encrypted, Pseudonymizable, Rekeyable, Transcryptable}; +use crate::data::traits::{ + BatchEncryptable, Encryptable, Encrypted, Pseudonymizable, Rekeyable, Transcryptable, +}; use crate::factors::TranscryptionInfo; use crate::factors::{ AttributeRekeyInfo, PseudonymRekeyInfo, PseudonymizationInfo, RerandomizeFactor, }; +#[cfg(feature = "offline")] +use crate::keys::{AttributeGlobalPublicKey, PseudonymGlobalPublicKey}; use crate::keys::{ - AttributeGlobalPublicKey, AttributeSessionPublicKey, AttributeSessionSecretKey, - PseudonymGlobalPublicKey, PseudonymSessionPublicKey, PseudonymSessionSecretKey, + AttributeSessionPublicKey, AttributeSessionSecretKey, PseudonymSessionPublicKey, + PseudonymSessionSecretKey, }; use derive_more::{Deref, From}; use rand_core::{CryptoRng, Rng}; @@ -25,6 +37,7 @@ use std::io::{Error, ErrorKind}; #[cfg(all(feature = "offline", feature = "insecure"))] use crate::keys::{AttributeGlobalSecretKey, PseudonymGlobalSecretKey}; +use crate::transcryptor::BatchError; /// A collection of [Pseudonym]s that together represent a larger pseudonym value using PKCS#7 padding. /// @@ -161,6 +174,82 @@ impl LongPseudonym { .collect::>() .join("") } + + /// Adds **external padding** to reach a target number of blocks for batch unlinkability. + /// + /// ## Purpose: Batch Transcryption Unlinkability + /// + /// In batch transcryption, all values **must have identical structure** to prevent + /// linkability attacks. This method adds full padding blocks (external padding) to + /// normalize different-sized pseudonyms to the same structure without modifying content. + /// + /// ## How it Works + /// + /// - Appends one or more **all-zero external padding blocks** after the data blocks: + /// `[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]` + /// - These blocks are **separate from** the internal PKCS#7 padding within blocks + /// - During decoding, scans backwards removing all-zero blocks until finding the data + /// - The original pseudonym value is perfectly preserved + /// + /// ## Parameters + /// + /// - `target_blocks`: The desired number of blocks (must be >= current block count) + /// + /// ## Returns + /// + /// Returns a new `LongPseudonym` padded to the target number of blocks. + /// + /// ## Errors + /// + /// Returns an error if: + /// - The current number of blocks exceeds the target + /// + /// ## Example: Normalizing for Batch Processing + /// + /// ```no_run + /// use libpep::data::long::LongPseudonym; + /// + /// let short_pseudo = LongPseudonym::from_string_padded("user123"); // 1 block + /// let long_pseudo = LongPseudonym::from_string_padded("user@example.com"); // 2 blocks + /// + /// // Normalize both to 2 blocks for unlinkable batch transcryption + /// let short_padded = short_pseudo.pad_to(2).unwrap(); + /// let long_padded = long_pseudo.pad_to(2).unwrap(); + /// + /// // Both now have identical structure (2 blocks) + /// assert_eq!(short_padded.len(), 2); + /// assert_eq!(long_padded.len(), 2); + /// + /// // Original values are preserved when decoded + /// assert_eq!(short_padded.to_string_padded().unwrap(), "user123"); + /// assert_eq!(long_padded.to_string_padded().unwrap(), "user@example.com"); + /// ``` + pub fn pad_to(&self, target_blocks: usize) -> Result { + let current_blocks = self.0.len(); + + if current_blocks > target_blocks { + return Err(Error::new( + ErrorKind::InvalidInput, + format!( + "Cannot pad: current blocks ({}) exceeds target ({})", + current_blocks, target_blocks + ), + )); + } + + if current_blocks == target_blocks { + return Ok(self.clone()); + } + + // Create external padding blocks (all zeros) + let padding_pattern = create_external_padding_block(); + let padding_block = Pseudonym::from_lizard(&padding_pattern); + + let mut blocks = self.0.clone(); + blocks.resize(target_blocks, padding_block); + + Ok(LongPseudonym(blocks)) + } } impl LongAttribute { @@ -238,6 +327,65 @@ impl LongAttribute { .collect::>() .join("") } + + /// Pads this `LongAttribute` to a target number of blocks. + /// + /// This is useful for batch operations where all attributes must have the same structure. + /// Additional padding blocks are all-zero blocks: + /// `[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]` + /// which are automatically detected and removed during decoding. + /// + /// # Parameters + /// + /// - `target_blocks`: The desired number of blocks (must be >= current block count) + /// + /// # Returns + /// + /// Returns a new `LongAttribute` padded to the target number of blocks. + /// + /// # Errors + /// + /// Returns an error if: + /// - The current number of blocks exceeds the target + /// + /// # Example + /// + /// ```no_run + /// use libpep::data::long::LongAttribute; + /// + /// let attr = LongAttribute::from_string_padded("hello"); + /// // Pad to 3 blocks for batch processing + /// let padded = attr.pad_to(3).unwrap(); + /// assert_eq!(padded.len(), 3); + /// // Decoding still returns the original string + /// assert_eq!(padded.to_string_padded().unwrap(), "hello"); + /// ``` + pub fn pad_to(&self, target_blocks: usize) -> Result { + let current_blocks = self.0.len(); + + if current_blocks > target_blocks { + return Err(Error::new( + ErrorKind::InvalidInput, + format!( + "Cannot pad: current blocks ({}) exceeds target ({})", + current_blocks, target_blocks + ), + )); + } + + if current_blocks == target_blocks { + return Ok(self.clone()); + } + + // Create external padding blocks (all zeros) + let padding_pattern = create_external_padding_block(); + let padding_block = Attribute::from_lizard(&padding_pattern); + + let mut blocks = self.0.clone(); + blocks.resize(target_blocks, padding_block); + + Ok(LongAttribute(blocks)) + } } impl LongEncryptedPseudonym { @@ -846,6 +994,20 @@ impl crate::data::traits::HasStructure for LongEncryptedAttribute { } } +#[cfg(feature = "batch")] +impl BatchEncryptable for LongPseudonym { + fn preprocess_batch(items: &[Self]) -> Result, BatchError> { + Ok(items.to_vec()) + } +} + +#[cfg(feature = "batch")] +impl BatchEncryptable for LongAttribute { + fn preprocess_batch(items: &[Self]) -> Result, BatchError> { + Ok(items.to_vec()) + } +} + /// Internal helper function to encode bytes with PKCS#7 padding fn from_bytes_padded_impl(data: &[u8]) -> Vec { // Calculate number of full blocks @@ -877,7 +1039,13 @@ fn from_bytes_padded_impl(data: &[u8]) -> Vec { result } -/// Internal helper function to decode padded bytes +/// Internal helper function to decode padded bytes. +/// +/// This function automatically detects and removes external padding blocks +/// created by `pad_to()`, ensuring that normalized values decode correctly. +/// +/// External padding uses all-zero blocks `[0x00, ...]` which are impossible +/// in valid PKCS#7 padding (valid padding bytes are 0x01-0x10). fn to_bytes_padded_impl(items: &[T]) -> Result, Error> { if items.is_empty() { return Err(Error::new( @@ -886,10 +1054,28 @@ fn to_bytes_padded_impl(items: &[T]) -> Result, E )); } - let mut result = Vec::with_capacity(items.len() * 16); + // Scan backwards from the end to remove external padding blocks (all-zero blocks) + // Stop when we find a non-padding block (which will have PKCS#7 padding) + let mut last_data_block_idx = items.len() - 1; + while last_data_block_idx > 0 { + let block = items[last_data_block_idx].to_lizard().ok_or(Error::new( + ErrorKind::InvalidData, + "Encryptable conversion to bytes failed", + ))?; + + if is_external_padding_block(&block) { + // This is external padding, continue scanning backwards + last_data_block_idx -= 1; + } else { + // Found a data block, stop scanning + break; + } + } + + let mut result = Vec::with_capacity((last_data_block_idx + 1) * 16); - // Copy all blocks except the last one - for item in items.iter().take(items.len() - 1) { + // Copy all blocks except the last data block + for item in items.iter().take(last_data_block_idx) { let block = item.to_lizard().ok_or(Error::new( ErrorKind::InvalidData, "Encryptable conversion to bytes failed", @@ -897,10 +1083,8 @@ fn to_bytes_padded_impl(items: &[T]) -> Result, E result.extend_from_slice(&block); } - // Process the last block and validate padding - // Unwrap is safe: we already checked items.is_empty() above - #[allow(clippy::unwrap_used)] - let last_block = items.last().unwrap().to_lizard().ok_or(Error::new( + // Process the last data block and validate PKCS#7 padding + let last_block = items[last_data_block_idx].to_lizard().ok_or(Error::new( ErrorKind::InvalidData, "Last encryptable conversion to bytes failed", ))?; @@ -1236,4 +1420,329 @@ mod tests { assert_eq!(1, deserialized.len()); assert_eq!(long_encrypted[0], deserialized[0]); } + + #[test] + fn long_attribute_null_bytes_in_middle() { + // Test string with null bytes in the middle + let str_with_nulls = "hello\0world"; + let attr = LongAttribute::from_string_padded(str_with_nulls); + let decoded = attr.to_string_padded().unwrap(); + assert_eq!(str_with_nulls, decoded); + } + + #[test] + fn long_attribute_null_bytes_at_end() { + // Test string ending with null bytes + let str_ending_nulls = "test\0\0"; + let attr = LongAttribute::from_string_padded(str_ending_nulls); + let decoded = attr.to_string_padded().unwrap(); + assert_eq!(str_ending_nulls, decoded); + } + + #[test] + fn long_attribute_empty_string() { + // Test empty string + let empty = ""; + let attr = LongAttribute::from_string_padded(empty); + let decoded = attr.to_string_padded().unwrap(); + assert_eq!(empty, decoded); + } + + #[test] + fn long_attribute_strings_ending_with_many_null_bytes() { + // Test various counts of trailing null bytes + for null_count in 1..=20 { + let mut test_str = String::from("test"); + test_str.push_str(&"\0".repeat(null_count)); + + let attr = LongAttribute::from_string_padded(&test_str); + let decoded = attr.to_string_padded().unwrap(); + + assert_eq!(test_str, decoded, "Failed for {} null bytes", null_count); + } + } + + #[test] + fn long_attribute_only_null_bytes() { + // Test strings that are only null bytes + for null_count in 1..=20 { + let test_str = "\0".repeat(null_count); + + let attr = LongAttribute::from_string_padded(&test_str); + let decoded = attr.to_string_padded().unwrap(); + + assert_eq!( + test_str, decoded, + "Failed for string of {} null bytes", + null_count + ); + } + } + + #[test] + fn long_attribute_edge_case_15_and_16_null_bytes() { + // 15 null bytes - exactly fits in one block with 1 byte padding + let str_15 = "\0".repeat(15); + let attr_15 = LongAttribute::from_string_padded(&str_15); + let decoded_15 = attr_15.to_string_padded().unwrap(); + assert_eq!(str_15, decoded_15); + + // 16 null bytes - requires 2 blocks (first full, second with 15 bytes data + 1 padding) + let str_16 = "\0".repeat(16); + let attr_16 = LongAttribute::from_string_padded(&str_16); + let decoded_16 = attr_16.to_string_padded().unwrap(); + assert_eq!(str_16, decoded_16); + + // 17 null bytes + let str_17 = "\0".repeat(17); + let attr_17 = LongAttribute::from_string_padded(&str_17); + let decoded_17 = attr_17.to_string_padded().unwrap(); + assert_eq!(str_17, decoded_17); + } + + #[test] + fn long_attribute_pad_to_with_null_bytes() { + // Create a string with null bytes + let str_with_nulls = "data\0\0end"; + let attr = LongAttribute::from_string_padded(str_with_nulls); + + // Pad to more blocks + let padded = attr.pad_to(3).unwrap(); + + // Should preserve the null bytes in the original string + let decoded = padded.to_string_padded().unwrap(); + assert_eq!(str_with_nulls, decoded); + } + + #[test] + fn long_attribute_pad_to_only_null_bytes() { + // Test strings that are only null bytes, then padded + for null_count in 1..=10 { + let test_str = "\0".repeat(null_count); + + let attr = LongAttribute::from_string_padded(&test_str); + let padded = attr.pad_to(5).unwrap(); + + let decoded = padded.to_string_padded().unwrap(); + assert_eq!( + test_str, decoded, + "Failed for padded string of {} null bytes", + null_count + ); + } + } + + #[test] + fn long_attribute_pad_to_empty_string() { + // Test empty string with padding + let empty = ""; + let attr = LongAttribute::from_string_padded(empty); + let padded = attr.pad_to(2).unwrap(); + + let decoded = padded.to_string_padded().unwrap(); + assert_eq!(empty, decoded); + } + + #[test] + fn long_pseudonym_null_bytes_roundtrip() { + // Test pseudonym with null bytes + let str_with_nulls = "user\0\0id"; + let pseudo = LongPseudonym::from_string_padded(str_with_nulls); + let decoded = pseudo.to_string_padded().unwrap(); + assert_eq!(str_with_nulls, decoded); + } + + #[test] + fn long_pseudonym_pad_to_with_null_bytes() { + // Test pseudonym with null bytes after padding + let str_with_nulls = "id\0\0x"; + let pseudo = LongPseudonym::from_string_padded(str_with_nulls); + let padded = pseudo.pad_to(3).unwrap(); + + let decoded = padded.to_string_padded().unwrap(); + assert_eq!(str_with_nulls, decoded); + } + + #[test] + fn long_attribute_data_ending_with_full_0x10_block() { + // Regression test for external padding detection: + // Plaintext containing a full block of 0x10 bytes should roundtrip correctly. + // After PKCS#7 encoding, this becomes [0x10×16][0x10×16] (data block + padding block). + // The decoder must correctly identify the second block as legitimate PKCS#7 padding, + // not external padding added by pad_to(). + let data = vec![0x10u8; 16]; + let attr = LongAttribute::from_bytes_padded(&data); + let decoded = attr.to_bytes_padded().unwrap(); + assert_eq!( + data, decoded, + "Data ending with full 0x10 block should roundtrip correctly" + ); + } + + #[test] + fn long_attribute_ascending_sequence_data() { + // Regression test: ensure data containing ascending sequence [0,1,2,...,15] + // can be encoded and decoded correctly even though it looks like a pattern. + let data: Vec = (0..16).collect(); + let attr = LongAttribute::from_bytes_padded(&data); + let decoded = attr.to_bytes_padded().unwrap(); + assert_eq!( + data, decoded, + "Ascending sequence data should roundtrip correctly" + ); + } + + #[test] + fn long_attribute_pad_to_preserves_data() { + // Test that pad_to correctly preserves data when adding external padding + let attr = LongAttribute::from_string_padded("hello"); + let original_len = attr.len(); + + let padded = attr.pad_to(original_len + 2).unwrap(); + assert_eq!( + padded.len(), + original_len + 2, + "Padded length should match target" + ); + + let decoded = padded.to_string_padded().unwrap(); + assert_eq!(decoded, "hello", "pad_to should preserve original data"); + } + + #[test] + fn long_pseudonym_pad_to_preserves_data() { + // Test that pad_to correctly preserves data for pseudonyms + let pseudo = LongPseudonym::from_string_padded("test-user-id"); + let original_len = pseudo.len(); + + let padded = pseudo.pad_to(original_len + 3).unwrap(); + assert_eq!( + padded.len(), + original_len + 3, + "Padded length should match target" + ); + + let decoded = padded.to_string_padded().unwrap(); + assert_eq!( + decoded, "test-user-id", + "pad_to should preserve original data" + ); + } + + #[test] + fn long_attribute_data_containing_magic_marker_multiblock() { + // Edge case: Data that contains bytes [0xFF, 0xEE, 0xDD, 0xCC] + // CAN be encoded regardless - external padding is all zeros now. + let data = vec![ + 0xFF, 0xEE, 0xDD, 0xCC, 0x99, 0x88, 0x77, 0x66, 0x55, 0x44, 0x33, 0x22, 0x11, 0x00, + 0xAA, 0xBB, 0xCC, + ]; + let attr = LongAttribute::from_bytes_padded(&data); + let decoded = attr.to_bytes_padded().unwrap(); + assert_eq!( + data, decoded, + "Multi-block data with any bytes should roundtrip correctly" + ); + } + + #[test] + fn long_attribute_single_block_with_magic_marker() { + // Edge case: Data starting with [0xFF, 0xEE, 0xDD, 0xCC] works fine. + // After PKCS#7, the last byte will be 0x08 (padding), not 0x00. + // External padding is all zeros, so this won't be confused. + let data = vec![0xFF, 0xEE, 0xDD, 0xCC, 0x99, 0x88, 0x77, 0x66]; + let attr = LongAttribute::from_bytes_padded(&data); + + let decoded = attr.to_bytes_padded().unwrap(); + assert_eq!( + data, decoded, + "Single-block data with any bytes should roundtrip correctly" + ); + } + + #[test] + fn long_attribute_data_exactly_matching_external_padding_pattern() { + // Edge case: Data that is all zeros. + // After PKCS#7 encoding, the last byte will be a padding value (0x01-0x10), not 0x00. + let data = vec![ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, + ]; + let attr = LongAttribute::from_bytes_padded(&data); + + // After PKCS#7: will be 1 block with last byte = 0x01 + // Not all zeros, so won't be confused with external padding + assert_eq!(attr.len(), 1); + + let decoded = attr.to_bytes_padded().unwrap(); + assert_eq!(data, decoded, "All-zero data should roundtrip correctly"); + } + + #[test] + fn long_attribute_double_pad_to_works() { + // With all-zero external padding, calling pad_to() multiple times works correctly. + // The decoder scans backwards removing all zero blocks until it finds the data block. + let attr = LongAttribute::from_string_padded("test"); + let padded_once = attr.pad_to(2).unwrap(); + let padded_twice = padded_once.pad_to(3).unwrap(); + + // This should succeed - all zero blocks are removed, leaving just the data + let result = padded_twice.to_string_padded(); + assert!( + result.is_ok(), + "Double pad_to should succeed with all-zero padding" + ); + assert_eq!(result.unwrap(), "test"); + } + + #[test] + fn verify_no_ambiguous_edge_cases() { + // Comprehensive verification that ALL data can be encoded without ambiguity + // External padding is all-zero blocks, and PKCS#7 ensures the last byte is never 0x00. + + // Test 1: Arbitrary data + let data1 = vec![0xFF, 0xEE, 0xDD, 0xCC, 0x99, 0x88, 0x77, 0x66]; + let attr1 = LongAttribute::from_bytes_padded(&data1); + let decoded1 = attr1.to_bytes_padded().unwrap(); + assert_eq!(data1, decoded1, "Arbitrary data should work"); + + // Test 2: All-zero data (PKCS#7 adds non-zero padding) + let data2 = vec![ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, + ]; + let attr2 = LongAttribute::from_bytes_padded(&data2); + let decoded2 = attr2.to_bytes_padded().unwrap(); + assert_eq!(data2, decoded2, "All-zero data should work"); + + // Test 3: Mixed zeros and non-zeros + let data3 = vec![ + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x00, 0x00, 0x00, 0x00, 0x11, 0x12, + 0x13, 0x14, 0x15, 0x16, + ]; + let attr3 = LongAttribute::from_bytes_padded(&data3); + let decoded3 = attr3.to_bytes_padded().unwrap(); + assert_eq!(data3, decoded3, "Mixed data should work"); + + // Test 4: pad_to() works correctly + let attr4 = LongAttribute::from_string_padded("hello"); + let padded = attr4.pad_to(3).unwrap(); + assert_eq!( + padded.len(), + 3, + "pad_to should create correct number of blocks" + ); + let decoded4 = padded.to_string_padded().unwrap(); + assert_eq!("hello", decoded4, "pad_to should preserve original data"); + + // Test 5: Various lengths with different byte patterns + for len in 1..=32 { + let mut data = vec![0x00; len]; // All zeros + data[0] = 0xFF; // Make first byte non-zero + + let attr = LongAttribute::from_bytes_padded(&data); + let decoded = attr.to_bytes_padded().unwrap(); + assert_eq!(data, decoded, "Data of length {} should work", len); + } + } } diff --git a/src/lib/data/padding/external.rs b/src/lib/data/padding/external.rs new file mode 100644 index 0000000..c37b8f6 --- /dev/null +++ b/src/lib/data/padding/external.rs @@ -0,0 +1,135 @@ +//! External padding for batch unlinkability. +//! +//! This module provides functions to create and detect external padding blocks used by the +//! `pad_to()` method on long data types for batch unlinkability. +//! +//! # Purpose +//! +//! External padding normalizes different-sized values to identical structure for unlinkable batch transcryption. +//! +//! # When Used +//! +//! - Explicitly via the `pad_to(n)` method on long types +//! - Only for multi-block data (see [`long`](crate::data::long) module) +//! - Required when batch processing needs unlinkability guarantees +//! +//! # How It Works +//! +//! - Adds full 16-byte all-zero blocks after the data +//! - Format: `[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]` +//! - During decoding, scans backwards from the end removing all-zero blocks until a data block is found +//! - Automatically detected and removed during decoding +//! +//! # Order of Operations +//! +//! **Encoding:** +//! 1. PKCS#7 padding is applied first to the last data block +//! 2. All-zero external padding blocks are added after +//! +//! **Decoding:** +//! 1. Scan backwards from the end, removing all-zero blocks +//! 2. Stop when a non-zero block is found (the last data block with PKCS#7 padding) +//! 3. Remove PKCS#7 padding from the last data block +//! +//! This ordering ensures that even if data is all zeros, PKCS#7 padding will change the last +//! byte to `0x01`-`0x10`, guaranteeing it won't be detected as an external padding block. +//! +//! # Example +//! +//! ```text +//! After pad_to(3): +//! Block 1: [h e l l o | 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B] ← data with PKCS#7 +//! Block 2: [0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00] ← padding +//! Block 3: [0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00] ← padding +//! ``` +//! +//! # Disambiguation Guarantee +//! +//! External padding blocks are **all zeros**, while PKCS#7 padded blocks **never** have `0x00` +//! in the last byte (valid PKCS#7 padding: `0x01`-`0x10`). +//! +//! Because PKCS#7 padding is applied first during encoding, it **always** changes the last +//! byte of any data block to `0x01`-`0x10`. This deterministically prevents any data from being +//! mistaken for an external padding block. +//! +//! This means **ALL possible byte sequences can be encoded without ambiguity**, including values +//! that are all zeros (PKCS#7 changes the last byte) + +/// Creates an external padding block. +/// +/// Format: [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00] +/// +/// All-zero blocks are impossible for PKCS#7 padded data blocks (valid padding bytes are 0x01-0x10), +/// making this unambiguous. +pub(crate) fn create_external_padding_block() -> [u8; 16] { + [0u8; 16] +} + +/// Checks if a block is an external padding block. +/// +/// Returns `true` if this block is all zeros (external padding), +/// or `false` if this is a regular data block. +/// +/// # Disambiguation Guarantee +/// +/// - External padding blocks are **all zeros** +/// - PKCS#7 padded data blocks **never** have `0x00` in the last byte (valid padding: `0x01`-`0x10`) +/// +/// This means **ALL possible byte sequences can be encoded without ambiguity**, including: +/// - Data blocks that are all zeros except the last byte (PKCS#7 will set last byte to 0x01-0x10) +/// - Any combination of bytes whatsoever +pub(crate) fn is_external_padding_block(block: &[u8]) -> bool { + if block.len() != 16 { + return false; + } + + // Check if the entire block is all zeros + // This guarantees disambiguation from data blocks because PKCS#7 padding + // always changes the last byte to 0x01-0x10, never 0x00 + block.iter().all(|&b| b == 0) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_create_external_padding_block() { + let block = create_external_padding_block(); + + // Check that the entire block is all zeros + assert_eq!(&block, &[0u8; 16]); + } + + #[test] + fn test_is_external_padding_block_valid() { + let block = create_external_padding_block(); + assert!(is_external_padding_block(&block)); + } + + #[test] + fn test_is_external_padding_block_invalid_length() { + let block = [0x00; 4]; + assert!(!is_external_padding_block(&block)); + } + + #[test] + fn test_is_external_padding_block_not_all_zeros() { + let mut block = [0x00; 16]; + block[0] = 0xFF; // Not all zeros + assert!(!is_external_padding_block(&block)); + } + + #[test] + fn test_disambiguation_pkcs7_never_all_zeros() { + // PKCS#7 valid padding bytes are 0x01-0x10 + // External padding is all zeros + // This guarantees no ambiguity + + for padding_value in 0x01..=0x10u8 { + let pkcs7_block = [padding_value; 16]; + // PKCS#7 block is not all zeros + assert!(!is_external_padding_block(&pkcs7_block)); + } + } +} diff --git a/src/lib/data/padding.rs b/src/lib/data/padding/internal.rs similarity index 78% rename from src/lib/data/padding.rs rename to src/lib/data/padding/internal.rs index a3d4b0f..9a3f1f1 100644 --- a/src/lib/data/padding.rs +++ b/src/lib/data/padding/internal.rs @@ -1,12 +1,75 @@ -//! PKCS#7 padding support for single-block (16 byte) encoding. +//! Internal PKCS#7 padding for single-block (16 byte) encoding. //! -//! This module provides the `Padded` trait for encoding data up to 15 bytes using PKCS#7 padding. -//! For multi-block data, see the `long` module. +//! This module provides the [`Padded`] trait for encoding data up to 15 bytes using PKCS#7 padding. +//! +//! # Purpose +//! +//! PKCS#7 padding ensures data fills complete 16-byte blocks during encryption. +//! +//! # When Used +//! +//! - Automatically applied during encoding/decoding +//! - For single-block data (up to 15 bytes) via the [`Padded`] trait +//! - For multi-block data within the last block (see [`long`](crate::data::long) module) +//! +//! # How It Works +//! +//! - The padding byte value indicates the number of padding bytes +//! - Valid padding bytes are `0x01`-`0x10` +//! - Always applied, even if data is exactly a multiple of 16 bytes +//! +//! # Example +//! +//! ```text +//! "hello" (5 bytes): +//! [h e l l o | 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B 0x0B] +//! └─ data ─┘ └──────────────── 11 padding bytes ────────────────────┘ +//! ``` +//! +//! # Order of Operations +//! +//! PKCS#7 padding is always applied **before** external padding (see [`external`](crate::data::padding::external)) +//! and removed **after** external padding is removed during decoding. This ordering is critical for +//! disambiguation - even if data matches the external padding pattern, PKCS#7 will change the last +//! byte to `0x01`-`0x10`, guaranteeing correct detection. +//! +//! # Disambiguation Guarantee +//! +//! PKCS#7 padding uses bytes `0x01`-`0x10`, so the last byte is **never** `0x00`. +//! This makes it completely unambiguous from external padding blocks, which are **all zeros**. +//! +//! This means **ALL possible byte sequences can be encoded without ambiguity**. use crate::data::simple::{Attribute, ElGamalEncryptable, Pseudonym}; use std::io::{Error, ErrorKind}; /// A trait for encryptable types that support PKCS#7 padding for single-block (16 byte) encoding. +/// +/// This trait provides methods to encode data up to 15 bytes using PKCS#7 padding, +/// which fills the remaining bytes of a 16-byte block with padding bytes. +/// +/// # Padding Format +/// +/// - For `n` bytes of data (0 ≤ n ≤ 15), add `16 - n` padding bytes +/// - Each padding byte has the value `16 - n` +/// - This allows unambiguous removal of padding during decoding +/// +/// # Examples +/// +/// ```ignore +/// use libpep::data::padding::Padded; +/// use libpep::data::simple::Attribute; +/// +/// // Encode a string +/// let attr = Attribute::from_string_padded("hello")?; +/// let decoded = attr.to_string_padded()?; +/// assert_eq!(decoded, "hello"); +/// +/// // Encode bytes +/// let attr = Attribute::from_bytes_padded(b"data")?; +/// let decoded = attr.to_bytes_padded()?; +/// assert_eq!(decoded, b"data"); +/// ``` pub trait Padded: ElGamalEncryptable { /// Encodes an arbitrary byte array using PKCS#7 padding. /// diff --git a/src/lib/data/padding/mod.rs b/src/lib/data/padding/mod.rs new file mode 100644 index 0000000..f5fff63 --- /dev/null +++ b/src/lib/data/padding/mod.rs @@ -0,0 +1,16 @@ +//! Padding mechanisms for PEP data encoding. +//! +//! This module provides two distinct types of padding for PEP data: +//! +//! - **Internal Padding (PKCS#7)**: See the [`internal`] module for PKCS#7 padding used for single-block encoding. +//! - **External Padding**: See the [`external`] module for padding blocks used for batch unlinkability. +//! +//! Both padding types are completely unambiguous and can encode any possible byte sequence. + +pub mod internal; + +#[cfg(feature = "long")] +pub mod external; + +// Re-export the Padded trait for convenience +pub use internal::Padded; diff --git a/src/lib/data/py/json.rs b/src/lib/data/py/json.rs index 0222016..d1e1f8c 100644 --- a/src/lib/data/py/json.rs +++ b/src/lib/data/py/json.rs @@ -64,6 +64,39 @@ impl PyPEPJSONValue { .map_err(|e| PyValueError::new_err(format!("Conversion failed: {}", e)))?; Python::attach(|py| json_to_python(py, &json_value)) } + + /// Get the structure/shape of this PEPJSONValue. + /// + /// Returns: + /// A JSONStructure describing the shape + #[pyo3(name = "structure")] + fn structure(&self) -> PyJSONStructure { + PyJSONStructure(self.0.structure()) + } + + /// Pads this PEPJSONValue to match a target structure by adding external padding blocks. + /// + /// This method adds external padding blocks (separate from PKCS#7 padding) to + /// LongString and LongPseudonym variants to ensure all instances have the same + /// number of blocks when encrypted. This is necessary for batch transcryption where + /// all values must have identical structure. + /// + /// Args: + /// structure: The target structure specifying the number of blocks for each field + /// + /// Returns: + /// A padded PEPJSONValue with padding blocks added where necessary + /// + /// Raises: + /// ValueError: If the current structure doesn't match the target structure type + /// or if the current size exceeds the target size + #[pyo3(name = "pad_to")] + fn pad_to(&self, structure: &PyJSONStructure) -> PyResult { + self.0 + .pad_to(&structure.0) + .map(Self) + .map_err(|e| PyValueError::new_err(format!("Padding failed: {}", e))) + } } /// An encrypted PEP JSON value. @@ -455,6 +488,29 @@ pub fn py_bytes_to_number(bytes: [u8; 9]) -> f64 { num.as_f64().unwrap_or(0.0) } +/// Unifies multiple JSON structures by taking the maximum block count for each field. +/// +/// This function is useful for batch operations where you need to normalize multiple +/// values to have the same structure. It recursively unifies nested structures, +/// taking the maximum block count for strings and pseudonyms. +/// +/// Args: +/// structures: A list of JSONStructure objects to unify +/// +/// Returns: +/// A unified JSONStructure where string and pseudonym fields have maximum block counts +/// +/// Raises: +/// ValueError: If the structures are incompatible (different types, array lengths, or object fields) +#[pyfunction] +#[pyo3(name = "unify_structures")] +pub fn py_unify_structures(structures: Vec) -> PyResult { + let rust_structures: Vec = structures.into_iter().map(|s| s.0).collect(); + crate::data::json::structure::unify_structures(&rust_structures) + .map(PyJSONStructure) + .map_err(|e| PyValueError::new_err(format!("Unification failed: {}", e))) +} + pub fn register(m: &Bound<'_, PyModule>) -> PyResult<()> { // Register main JSON types at json module level m.add_class::()?; @@ -480,6 +536,7 @@ pub fn register(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction!(py_byte_to_bool, m)?)?; m.add_function(wrap_pyfunction!(py_number_to_bytes, m)?)?; m.add_function(wrap_pyfunction!(py_bytes_to_number, m)?)?; + m.add_function(wrap_pyfunction!(py_unify_structures, m)?)?; Ok(()) } diff --git a/src/lib/data/py/long.rs b/src/lib/data/py/long.rs index 181b040..49dadb1 100644 --- a/src/lib/data/py/long.rs +++ b/src/lib/data/py/long.rs @@ -69,6 +69,28 @@ impl PyLongPseudonym { Ok(PyBytes::new(py, &result).into()) } + /// Pads this LongPseudonym to a target number of blocks for batch unlinkability. + /// + /// In batch transcryption, all values must have identical structure to prevent + /// linkability attacks. This method adds external padding blocks to normalize + /// different-sized pseudonyms to the same structure. + /// + /// Args: + /// target_blocks: The desired number of blocks (must be >= current block count) + /// + /// Returns: + /// A new LongPseudonym padded to the target number of blocks + /// + /// Raises: + /// ValueError: If the current number of blocks exceeds the target + #[pyo3(name = "pad_to")] + fn pad_to(&self, target_blocks: usize) -> PyResult { + self.0 + .pad_to(target_blocks) + .map(Self) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("Padding failed: {e}"))) + } + /// Get the underlying pseudonyms. #[pyo3(name = "pseudonyms")] fn pseudonyms(&self) -> Vec { @@ -139,6 +161,27 @@ impl PyLongAttribute { Ok(PyBytes::new(py, &result).into()) } + /// Pads this LongAttribute to a target number of blocks for batch operations. + /// + /// This is useful for batch operations where all attributes must have the same structure. + /// The padding blocks are automatically detected and skipped during decoding. + /// + /// Args: + /// target_blocks: The desired number of blocks (must be >= current block count) + /// + /// Returns: + /// A new LongAttribute padded to the target number of blocks + /// + /// Raises: + /// ValueError: If the current number of blocks exceeds the target + #[pyo3(name = "pad_to")] + fn pad_to(&self, target_blocks: usize) -> PyResult { + self.0 + .pad_to(target_blocks) + .map(Self) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("Padding failed: {e}"))) + } + /// Get the underlying attributes. #[pyo3(name = "attributes")] fn attributes(&self) -> Vec { diff --git a/src/lib/data/py/records.rs b/src/lib/data/py/records.rs index e6b3311..50af282 100644 --- a/src/lib/data/py/records.rs +++ b/src/lib/data/py/records.rs @@ -10,7 +10,7 @@ use crate::data::py::simple::{ }; use crate::data::records::{EncryptedRecord, Record}; #[cfg(feature = "long")] -use crate::data::records::{LongEncryptedRecord, LongRecord}; +use crate::data::records::{LongEncryptedRecord, LongRecord, LongRecordStructure}; use crate::keys::py::PySessionKeys; use crate::keys::types::SessionKeys; use pyo3::prelude::*; @@ -177,6 +177,39 @@ impl PyLongRecord { self.0.attributes.len() ) } + + /// Get the structure of this LongRecord. + /// + /// Returns: + /// A LongRecordStructure describing the number of blocks in each pseudonym and attribute + #[pyo3(name = "structure")] + fn structure(&self) -> PyLongRecordStructure { + PyLongRecordStructure(self.0.structure()) + } + + /// Pads this LongRecord to match a target structure by adding external padding blocks. + /// + /// This method adds external padding blocks (separate from PKCS#7 padding) to + /// each pseudonym and attribute to ensure all records have the same structure. + /// This is necessary for batch transcryption where all values must have identical + /// structure to prevent linkability attacks. + /// + /// Args: + /// structure: The target structure specifying the number of blocks for each field + /// + /// Returns: + /// A padded LongRecord with padding blocks added where necessary + /// + /// Raises: + /// ValueError: If the number of pseudonyms/attributes doesn't match the structure + /// or if any field exceeds its target size + #[pyo3(name = "pad_to")] + fn pad_to(&self, structure: &PyLongRecordStructure) -> PyResult { + self.0 + .pad_to(&structure.0) + .map(Self) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("Padding failed: {e}"))) + } } #[cfg(feature = "long")] @@ -268,6 +301,48 @@ pub fn py_decrypt_long_record( PyLongRecord(decrypt(&encrypted.0, &keys)) } +#[cfg(feature = "long")] +/// Structure descriptor for LongRecords - describes the shape including block counts. +#[pyclass(name = "LongRecordStructure", from_py_object)] +#[derive(Clone)] +pub struct PyLongRecordStructure(pub(crate) LongRecordStructure); + +#[cfg(feature = "long")] +#[pymethods] +impl PyLongRecordStructure { + /// Create a new LongRecordStructure with block counts for pseudonyms and attributes. + /// + /// Args: + /// pseudonym_blocks: List of block counts for each pseudonym + /// attribute_blocks: List of block counts for each attribute + #[new] + pub fn new(pseudonym_blocks: Vec, attribute_blocks: Vec) -> Self { + PyLongRecordStructure(LongRecordStructure { + pseudonym_blocks, + attribute_blocks, + }) + } + + /// Get the block counts for pseudonyms. + #[getter] + pub fn pseudonym_blocks(&self) -> Vec { + self.0.pseudonym_blocks.clone() + } + + /// Get the block counts for attributes. + #[getter] + pub fn attribute_blocks(&self) -> Vec { + self.0.attribute_blocks.clone() + } + + fn __repr__(&self) -> String { + format!( + "LongRecordStructure(pseudonym_blocks={:?}, attribute_blocks={:?})", + self.0.pseudonym_blocks, self.0.attribute_blocks + ) + } +} + pub fn register(m: &Bound<'_, PyModule>) -> PyResult<()> { // Register Record types m.add_class::()?; @@ -278,6 +353,7 @@ pub fn register(m: &Bound<'_, PyModule>) -> PyResult<()> { { m.add_class::()?; m.add_class::()?; + m.add_class::()?; } Ok(()) diff --git a/src/lib/data/records.rs b/src/lib/data/records.rs index b4e5bbd..d01c845 100644 --- a/src/lib/data/records.rs +++ b/src/lib/data/records.rs @@ -6,7 +6,7 @@ use crate::data::simple::{ Attribute, ElGamalEncrypted, EncryptedAttribute, EncryptedPseudonym, Pseudonym, }; -use crate::data::traits::{Encryptable, Encrypted, Transcryptable}; +use crate::data::traits::{BatchEncryptable, Encryptable, Encrypted, Transcryptable}; use crate::factors::TranscryptionInfo; #[cfg(feature = "offline")] use crate::keys::GlobalPublicKeys; @@ -23,6 +23,7 @@ use crate::data::long::{ #[cfg(feature = "batch")] use crate::data::traits::HasStructure; +use crate::transcryptor::BatchError; /// Structure descriptor for Records - describes the shape without the data. #[derive(Debug, Clone, PartialEq, Eq)] @@ -185,6 +186,84 @@ impl LongRecord { attributes, } } + + /// Pads this LongRecord to match a target structure by adding external padding blocks. + /// + /// This method adds external padding blocks (separate from PKCS#7 padding) to + /// each pseudonym and attribute to ensure all records have the same structure. + /// This is necessary for batch transcryption where all values must have identical + /// structure to prevent linkability attacks. + /// + /// # Arguments + /// + /// * `structure` - The target structure specifying the number of blocks for each field + /// + /// # Returns + /// + /// A padded LongRecord with padding blocks added where necessary + /// + /// # Errors + /// + /// Returns an error if: + /// - The number of pseudonyms doesn't match the structure + /// - The number of attributes doesn't match the structure + /// - Any pseudonym or attribute exceeds its target size + pub fn pad_to(&self, structure: &LongRecordStructure) -> Result { + // Validate counts + if self.pseudonyms.len() != structure.pseudonym_blocks.len() { + return Err(Error::new( + ErrorKind::InvalidInput, + format!( + "Pseudonym count mismatch: record has {} but structure expects {}", + self.pseudonyms.len(), + structure.pseudonym_blocks.len() + ), + )); + } + + if self.attributes.len() != structure.attribute_blocks.len() { + return Err(Error::new( + ErrorKind::InvalidInput, + format!( + "Attribute count mismatch: record has {} but structure expects {}", + self.attributes.len(), + structure.attribute_blocks.len() + ), + )); + } + + // Pad pseudonyms + let padded_pseudonyms: Vec<_> = self + .pseudonyms + .iter() + .zip(structure.pseudonym_blocks.iter()) + .map(|(p, &target_blocks)| p.pad_to(target_blocks)) + .collect::>()?; + + // Pad attributes + let padded_attributes: Vec<_> = self + .attributes + .iter() + .zip(structure.attribute_blocks.iter()) + .map(|(a, &target_blocks)| a.pad_to(target_blocks)) + .collect::>()?; + + Ok(LongRecord { + pseudonyms: padded_pseudonyms, + attributes: padded_attributes, + }) + } + + /// Get the structure of this LongRecord. + /// + /// Returns a `LongRecordStructure` describing the number of blocks in each + /// pseudonym and attribute. + pub fn structure(&self) -> LongRecordStructure { + LongRecordStructure { + pseudonym_blocks: self.pseudonyms.iter().map(|p| p.0.len()).collect(), + attribute_blocks: self.attributes.iter().map(|a| a.0.len()).collect(), + } + } } #[cfg(feature = "long")] @@ -733,3 +812,17 @@ impl HasStructure for LongEncryptedRecord { } } } + +#[cfg(feature = "batch")] +impl BatchEncryptable for Record { + fn preprocess_batch(items: &[Self]) -> Result, BatchError> { + Ok(items.to_vec()) + } +} + +#[cfg(feature = "batch")] +impl BatchEncryptable for LongRecord { + fn preprocess_batch(items: &[Self]) -> Result, BatchError> { + Ok(items.to_vec()) + } +} diff --git a/src/lib/data/simple.rs b/src/lib/data/simple.rs index 806758d..4f1ac93 100644 --- a/src/lib/data/simple.rs +++ b/src/lib/data/simple.rs @@ -4,12 +4,15 @@ use crate::arithmetic::group_elements::GroupElement; use crate::arithmetic::scalars::ScalarNonZero; use crate::core::elgamal::{ElGamal, ELGAMAL_LENGTH}; -use crate::data::traits::{Encryptable, Encrypted, Pseudonymizable, Rekeyable, Transcryptable}; +use crate::data::traits::{ + BatchEncryptable, Encryptable, Encrypted, Pseudonymizable, Rekeyable, Transcryptable, +}; use crate::factors::TranscryptionInfo; use crate::factors::{ AttributeRekeyInfo, PseudonymRekeyInfo, PseudonymizationInfo, RerandomizeFactor, }; use crate::keys::*; +use crate::transcryptor::BatchError; use derive_more::{Deref, From}; use rand_core::{CryptoRng, Rng}; #[cfg(feature = "serde")] @@ -509,6 +512,20 @@ impl crate::data::traits::HasStructure for EncryptedAttribute { fn structure(&self) -> Self::Structure {} } +#[cfg(feature = "batch")] +impl BatchEncryptable for Pseudonym { + fn preprocess_batch(items: &[Self]) -> Result, BatchError> { + Ok(items.to_vec()) + } +} + +#[cfg(feature = "batch")] +impl BatchEncryptable for Attribute { + fn preprocess_batch(items: &[Self]) -> Result, BatchError> { + Ok(items.to_vec()) + } +} + #[cfg(test)] #[allow(clippy::unwrap_used, clippy::expect_used)] mod tests { diff --git a/src/lib/data/traits.rs b/src/lib/data/traits.rs index 19fe557..c6d28a2 100644 --- a/src/lib/data/traits.rs +++ b/src/lib/data/traits.rs @@ -162,3 +162,10 @@ pub trait HasStructure { /// Get the structure of this encrypted value. fn structure(&self) -> Self::Structure; } + +#[cfg(feature = "batch")] +pub trait BatchEncryptable: Encryptable + Clone { + fn preprocess_batch( + items: &[Self], + ) -> Result, crate::transcryptor::batch::BatchError>; +} diff --git a/src/lib/data/wasm/json.rs b/src/lib/data/wasm/json.rs index 5579bd3..3007be6 100644 --- a/src/lib/data/wasm/json.rs +++ b/src/lib/data/wasm/json.rs @@ -1,6 +1,6 @@ //! WASM bindings for PEP JSON encryption. -use crate::client::{decrypt, encrypt}; +use crate::client::{decrypt, decrypt_batch, encrypt, encrypt_batch}; #[cfg(all(feature = "offline", feature = "insecure"))] use crate::client::{decrypt_global, encrypt_global}; use crate::data::json::builder::PEPJSONBuilder; @@ -66,6 +66,43 @@ impl WASMPEPJSONValue { serde_wasm_bindgen::to_value(&json_value) .map_err(|e| JsValue::from_str(&format!("Failed to convert to JS: {}", e))) } + + /// Get the structure/shape of this PEPJSONValue. + /// + /// # Returns + /// + /// A JSONStructure describing the shape + #[wasm_bindgen] + pub fn structure(&self) -> WASMJSONStructure { + WASMJSONStructure(self.0.structure()) + } + + /// Pads this PEPJSONValue to match a target structure by adding external padding blocks. + /// + /// This method adds external padding blocks (separate from PKCS#7 padding) to + /// LongString and LongPseudonym variants to ensure all instances have the same + /// number of blocks when encrypted. This is necessary for batch transcryption where + /// all values must have identical structure. + /// + /// # Arguments + /// + /// * `structure` - The target structure specifying the number of blocks for each field + /// + /// # Returns + /// + /// A padded PEPJSONValue with padding blocks added where necessary + /// + /// # Errors + /// + /// Throws an error if the current structure doesn't match the target structure type + /// or if the current size exceeds the target size + #[wasm_bindgen(js_name = padTo)] + pub fn pad_to(&self, structure: &WASMJSONStructure) -> Result { + self.0 + .pad_to(&structure.0) + .map(Self) + .map_err(|e| JsValue::from_str(&format!("Padding failed: {}", e))) + } } /// An encrypted PEP JSON value. @@ -289,6 +326,32 @@ pub fn wasm_encrypt_json( WASMEncryptedPEPJSONValue(encrypted) } +/// Batch encrypt a list of PEPJSONValues using session keys. +/// All values must have the same structure, and the resulting encrypted values will be padded to match the maximum block counts for that structure. +/// +/// # Arguments +/// * `values` - Array of PEPJSONValue objects to encrypt +/// * `session_keys` - Session keys containing public and secret keys for both pseudonyms and +/// +/// # Returns +/// An array of EncryptedPEPJSONValue objects, all with the same structure and padded to match the maximum block counts for that structure +#[cfg(feature = "batch")] +#[wasm_bindgen(js_name = encryptJsonBatch)] +pub fn wasm_encrypt_json_batch( + values: Vec, + session_keys: &WASMSessionKeys, +) -> Result, JsValue> { + let mut rng = rand::rng(); + let keys: SessionKeys = (*session_keys).into(); + let rust_values: Vec = values.into_iter().map(|v| v.0).collect(); + let encrypted = encrypt_batch(&rust_values, &keys, &mut rng) + .map_err(|e| JsValue::from_str(&format!("{}", e)))?; + + Ok(encrypted + .into_iter() + .map(WASMEncryptedPEPJSONValue) + .collect()) +} /// Decrypt an EncryptedPEPJSONValue using session keys. /// /// # Arguments @@ -313,6 +376,31 @@ pub fn wasm_decrypt_json( Ok(WASMPEPJSONValue(decrypted)) } +/// Decrypt a batch of EncryptedPEPJSONValues using session keys. +/// +/// # Arguments +/// +/// * `encrypted` - Array of EncryptedPEPJSONValue objects to decrypt +/// * `session_keys` - Session keys containing public and secret keys for both pseudonyms and +/// +/// # Returns +/// An array of PEPJSONValue objects +/// # Errors +/// Returns an error if any value fails to decrypt +#[cfg(feature = "batch")] +#[wasm_bindgen(js_name = decryptJsonBatch)] +pub fn wasm_decrypt_json_batch( + encrypted: Vec, + session_keys: &WASMSessionKeys, +) -> Result, JsValue> { + let keys: SessionKeys = (*session_keys).into(); + let rust_encrypted: Vec = encrypted.into_iter().map(|v| v.0).collect(); + let decrypted = + decrypt_batch(&rust_encrypted, &keys).map_err(|e| JsValue::from_str(&format!("{}", e)))?; + + Ok(decrypted.into_iter().map(WASMPEPJSONValue).collect()) +} + /// Transcrypt a batch of EncryptedPEPJSONValues using a TranscryptionInfo object. /// /// # Arguments @@ -415,3 +503,30 @@ pub fn wasm_bytes_to_number(bytes: Vec) -> Result { let num = utils::bytes_to_number(&arr); Ok(num.as_f64().unwrap_or(0.0)) } + +/// Unifies multiple JSON structures by taking the maximum block count for each field. +/// +/// This function is useful for batch operations where you need to normalize multiple +/// values to have the same structure. It recursively unifies nested structures, +/// taking the maximum block count for strings and pseudonyms. +/// +/// # Arguments +/// +/// * `structures` - An array of JSONStructure objects to unify +/// +/// # Returns +/// +/// A unified JSONStructure where string and pseudonym fields have maximum block counts +/// +/// # Errors +/// +/// Returns an error if the structures are incompatible (different types, array lengths, or object fields) +#[wasm_bindgen(js_name = unifyStructures)] +pub fn wasm_unify_structures( + structures: Vec, +) -> Result { + let rust_structures: Vec = structures.into_iter().map(|s| s.0).collect(); + crate::data::json::structure::unify_structures(&rust_structures) + .map(WASMJSONStructure) + .map_err(|e| JsValue::from_str(&format!("Unification failed: {}", e))) +} diff --git a/src/lib/data/wasm/long.rs b/src/lib/data/wasm/long.rs index 7da1f7d..da8393c 100644 --- a/src/lib/data/wasm/long.rs +++ b/src/lib/data/wasm/long.rs @@ -50,6 +50,31 @@ impl WASMLongPseudonym { .map_err(|e| JsError::new(&format!("Decoding failed: {e}"))) } + /// Pads this LongPseudonym to a target number of blocks for batch unlinkability. + /// + /// In batch transcryption, all values must have identical structure to prevent + /// linkability attacks. This method adds external padding blocks to normalize + /// different-sized pseudonyms to the same structure. + /// + /// # Arguments + /// + /// * `targetBlocks` - The desired number of blocks (must be >= current block count) + /// + /// # Returns + /// + /// A new LongPseudonym padded to the target number of blocks + /// + /// # Errors + /// + /// Throws an error if the current number of blocks exceeds the target + #[wasm_bindgen(js_name = padTo)] + pub fn pad_to(&self, target_blocks: usize) -> Result { + self.0 + .pad_to(target_blocks) + .map(Self) + .map_err(|e| JsError::new(&format!("Padding failed: {e}"))) + } + /// Get the underlying pseudonyms. #[wasm_bindgen(getter)] pub fn pseudonyms(&self) -> Vec { @@ -111,6 +136,30 @@ impl WASMLongAttribute { .map_err(|e| JsError::new(&format!("Decoding failed: {e}"))) } + /// Pads this LongAttribute to a target number of blocks for batch operations. + /// + /// This is useful for batch operations where all attributes must have the same structure. + /// The padding blocks are automatically detected and skipped during decoding. + /// + /// # Arguments + /// + /// * `targetBlocks` - The desired number of blocks (must be >= current block count) + /// + /// # Returns + /// + /// A new LongAttribute padded to the target number of blocks + /// + /// # Errors + /// + /// Throws an error if the current number of blocks exceeds the target + #[wasm_bindgen(js_name = padTo)] + pub fn pad_to(&self, target_blocks: usize) -> Result { + self.0 + .pad_to(target_blocks) + .map(Self) + .map_err(|e| JsError::new(&format!("Padding failed: {e}"))) + } + /// Get the underlying attributes. #[wasm_bindgen(getter)] pub fn attributes(&self) -> Vec { @@ -235,9 +284,13 @@ impl WASMLongEncryptedAttribute { } /// WASM bindings for batch operations on long (multi-block) data types. +#[cfg(feature = "batch")] use crate::data::records::LongEncryptedRecord; +#[cfg(feature = "batch")] use crate::factors::wasm::contexts::WASMTranscryptionInfo; +#[cfg(feature = "batch")] use crate::factors::wasm::types::WASMPseudonymRekeyFactor; +#[cfg(feature = "batch")] use crate::factors::TranscryptionInfo; #[cfg(feature = "batch")] use crate::transcryptor::{rekey_batch, transcrypt_batch}; diff --git a/src/lib/data/wasm/records.rs b/src/lib/data/wasm/records.rs index 1ba7fa7..029af59 100644 --- a/src/lib/data/wasm/records.rs +++ b/src/lib/data/wasm/records.rs @@ -7,7 +7,7 @@ use crate::data::wasm::simple::{ use wasm_bindgen::prelude::*; #[cfg(feature = "long")] -use crate::data::records::{LongEncryptedRecord, LongRecord}; +use crate::data::records::{LongEncryptedRecord, LongRecord, LongRecordStructure}; #[cfg(feature = "long")] use crate::data::wasm::long::{ WASMLongAttribute, WASMLongEncryptedAttribute, WASMLongEncryptedPseudonym, WASMLongPseudonym, @@ -156,6 +156,45 @@ impl WASMLongRecord { pub fn attributes(&self) -> Vec { self.attributes.clone() } + + /// Get the structure of this LongRecord. + /// + /// # Returns + /// + /// A LongRecordStructure describing the number of blocks in each pseudonym and attribute + #[wasm_bindgen] + pub fn structure(&self) -> WASMLongRecordStructure { + let rust_record: LongRecord = self.into(); + WASMLongRecordStructure(rust_record.structure()) + } + + /// Pads this LongRecord to match a target structure by adding external padding blocks. + /// + /// This method adds external padding blocks (separate from PKCS#7 padding) to + /// each pseudonym and attribute to ensure all records have the same structure. + /// This is necessary for batch transcryption where all values must have identical + /// structure to prevent linkability attacks. + /// + /// # Arguments + /// + /// * `structure` - The target structure specifying the number of blocks for each field + /// + /// # Returns + /// + /// A padded LongRecord with padding blocks added where necessary + /// + /// # Errors + /// + /// Throws an error if the number of pseudonyms/attributes doesn't match the structure + /// or if any field exceeds its target size + #[wasm_bindgen(js_name = padTo)] + pub fn pad_to(&self, structure: &WASMLongRecordStructure) -> Result { + let rust_record: LongRecord = self.into(); + rust_record + .pad_to(&structure.0) + .map(WASMLongRecord::from) + .map_err(|e| JsValue::from_str(&format!("Padding failed: {e}"))) + } } #[cfg(feature = "long")] @@ -168,6 +207,16 @@ impl From for LongRecord { } } +#[cfg(feature = "long")] +impl From<&WASMLongRecord> for LongRecord { + fn from(record: &WASMLongRecord) -> Self { + LongRecord::new( + record.pseudonyms.iter().map(|p| p.0.clone()).collect(), + record.attributes.iter().map(|a| a.0.clone()).collect(), + ) + } +} + #[cfg(feature = "long")] impl From for WASMLongRecord { fn from(record: LongRecord) -> Self { @@ -250,3 +299,38 @@ impl From for LongEncryptedRecord { ) } } + +#[cfg(feature = "long")] +/// Structure descriptor for LongRecords - describes the shape including block counts. +#[wasm_bindgen(js_name = LongRecordStructure)] +pub struct WASMLongRecordStructure(pub(crate) LongRecordStructure); + +#[cfg(feature = "long")] +#[wasm_bindgen(js_class = LongRecordStructure)] +impl WASMLongRecordStructure { + /// Create a new LongRecordStructure with block counts for pseudonyms and attributes. + /// + /// # Arguments + /// + /// * `pseudonymBlocks` - Array of block counts for each pseudonym + /// * `attributeBlocks` - Array of block counts for each attribute + #[wasm_bindgen(constructor)] + pub fn new(pseudonym_blocks: Vec, attribute_blocks: Vec) -> Self { + WASMLongRecordStructure(LongRecordStructure { + pseudonym_blocks, + attribute_blocks, + }) + } + + /// Get the block counts for pseudonyms. + #[wasm_bindgen(getter, js_name = pseudonymBlocks)] + pub fn pseudonym_blocks(&self) -> Vec { + self.0.pseudonym_blocks.clone() + } + + /// Get the block counts for attributes. + #[wasm_bindgen(getter, js_name = attributeBlocks)] + pub fn attribute_blocks(&self) -> Vec { + self.0.attribute_blocks.clone() + } +} diff --git a/src/lib/transcryptor/batch.rs b/src/lib/transcryptor/batch.rs index 525a54b..d70ee2e 100644 --- a/src/lib/transcryptor/batch.rs +++ b/src/lib/transcryptor/batch.rs @@ -1,5 +1,6 @@ //! Batch operations for pseudonymization, rekeying, and transcryption with shuffling. +use crate::data::json::{JsonError, UnifyError}; use crate::data::traits::{HasStructure, Pseudonymizable, Rekeyable, Transcryptable}; use crate::factors::TranscryptionInfo; use rand_core::{CryptoRng, Rng}; @@ -21,6 +22,10 @@ pub enum BatchError { expected_structure: String, actual_structure: String, }, + #[error(transparent)] + UnifyError(#[from] UnifyError), + #[error(transparent)] + JsonError(#[from] JsonError), } /// Fisher-Yates shuffle using rand_core diff --git a/src/lib/transcryptor/wasm/distributed.rs b/src/lib/transcryptor/wasm/distributed.rs index 915601f..d91f5be 100644 --- a/src/lib/transcryptor/wasm/distributed.rs +++ b/src/lib/transcryptor/wasm/distributed.rs @@ -1,7 +1,8 @@ //! WASM bindings for distributed transcryptor. -#[cfg(feature = "long")] +#[cfg(all(feature = "long", feature = "batch"))] use crate::data::long::{LongEncryptedAttribute, LongEncryptedPseudonym}; +#[cfg(feature = "batch")] use crate::data::simple::{EncryptedAttribute, EncryptedPseudonym}; #[cfg(feature = "long")] use crate::data::wasm::long::{WASMLongEncryptedAttribute, WASMLongEncryptedPseudonym}; diff --git a/src/lib/transcryptor/wasm/types.rs b/src/lib/transcryptor/wasm/types.rs index b7b6a53..630e4d7 100644 --- a/src/lib/transcryptor/wasm/types.rs +++ b/src/lib/transcryptor/wasm/types.rs @@ -1,7 +1,8 @@ //! WASM bindings for transcryptor types. -#[cfg(feature = "long")] +#[cfg(all(feature = "long", feature = "batch"))] use crate::data::long::{LongEncryptedAttribute, LongEncryptedPseudonym}; +#[cfg(feature = "batch")] use crate::data::simple::{EncryptedAttribute, EncryptedPseudonym}; #[cfg(feature = "long")] use crate::data::wasm::long::{WASMLongEncryptedAttribute, WASMLongEncryptedPseudonym}; diff --git a/tests/python/test_json.py b/tests/python/test_json.py index 613a7a1..77cdb9e 100644 --- a/tests/python/test_json.py +++ b/tests/python/test_json.py @@ -19,6 +19,7 @@ ) from libpep.client import ( encrypt, + encrypt_batch, decrypt, ) from libpep.data import json as pepjson @@ -224,6 +225,84 @@ def test_json_batch_transcryption_different_structures(self): f"Error should mention structure mismatch, got: {context.exception}", ) + def test_json_batch_transcryption_same_structure_different_lengths(self): + """ + Test JSON batch transcryption where structures differ in length. + Individual encryptions will fail in a batch, but encrypt_json_batch + should succeed by normalizing/padding the structures. + """ + # Setup keys and secrets + global_keys = make_global_keys() + pseudo_secret = PseudonymizationSecret(b"pseudo-secret") + enc_secret = EncryptionSecret(b"encryption-secret") + + domain_a = PseudonymizationDomain("domain-a") + domain_b = PseudonymizationDomain("domain-b") + session = EncryptionContext("session-1") + + # global_keys[1] is the Public Key + session_keys = make_session_keys(global_keys[1], session, enc_secret) + + # Create two JSON values with same keys but different string lengths + data1 = { + "patient_id": "p1", + "diagnosis": "Flu", + "temperature": 38.5 + } + + data2 = { + "patient_id": "patient-002-with-a-very-long-id-that-changes-length", + "diagnosis": "Flu with a very long description to ensure structure length differs", + "temperature": 38.5 + } + + # Convert to PEP JSON + record1 = PEPJSONBuilder.from_json(data1, ["patient_id"]).build() + record2 = PEPJSONBuilder.from_json(data2, ["patient_id"]).build() + + # 1. Encrypt separately + encrypted1 = encrypt(record1, session_keys) + encrypted2 = encrypt(record2, session_keys) + + # Verify they have different structures due to length + self.assertNotEqual(encrypted1.structure(), encrypted2.structure()) + + transcryption_info = TranscryptionInfo( + domain_a, domain_b, session, session, pseudo_secret, enc_secret + ) + + # 2. Attempt batch transcryption (should fail because structures are not identical) + with self.assertRaises(Exception) as cm: + transcrypt_json_batch([encrypted1, encrypted2], transcryption_info) + + self.assertIn("structure", str(cm.exception).lower()) + + # 3. Use encrypt_json_batch (this automatically pads both to the same structure) + # Note: Depending on your specific pyo3 mapping, this might be in pepjson or client + encrypted_batch = encrypt_batch([record1, record2], session_keys) + + # Verify that the padded structures are now identical + self.assertEqual( + encrypted_batch[0].structure(), + encrypted_batch[1].structure(), + "encrypt_json_batch should have unified the structures via padding" + ) + + # 4. Batch transcrypt the normalized records (should succeed) + transcrypted_batch = transcrypt_json_batch(encrypted_batch, transcryption_info) + + # Verify output + self.assertEqual(len(transcrypted_batch), 2) + + # Decrypt and check data integrity (order may be shuffled by batch transcryption) + decrypted_jsons = [ + decrypt(v, session_keys).to_json() for v in transcrypted_batch + ] + diagnoses = {d["diagnosis"] for d in decrypted_jsons} + + self.assertIn("Flu", diagnoses) + self.assertIn("Flu with a very long description to ensure structure length differs", diagnoses) + if __name__ == "__main__": unittest.main() diff --git a/tests/wasm/json.test.js b/tests/wasm/json.test.js index 59da773..ac23f74 100644 --- a/tests/wasm/json.test.js +++ b/tests/wasm/json.test.js @@ -9,7 +9,7 @@ const { PseudonymizationSecret, EncryptionSecret, PseudonymizationDomain, - EncryptionContext, + EncryptionContext, encryptJsonBatch, } = require("../../pkg/libpep.js"); test('test json transcryption with builder', async () => { @@ -203,3 +203,65 @@ test('test json batch transcryption different structures', async () => { transcryptJsonBatch([encrypted1, encrypted2], transcryptionInfo); }).toThrow(/Inconsistent structure in batch/); }); + + +test('test json batch transcryption same structure different lengths', async () => { + // Setup keys and secrets + const globalKeys = makeGlobalKeys(); + const pseudoSecret = new PseudonymizationSecret(Uint8Array.from(Buffer.from("pseudo-secret"))); + const encSecret = new EncryptionSecret(Uint8Array.from(Buffer.from("encryption-secret"))); + + const domainA = new PseudonymizationDomain("domain-a"); + const domainB = new PseudonymizationDomain("domain-b"); + const session = new EncryptionContext("session-1"); + + const sessionKeys = makeSessionKeys(globalKeys.secret, session, encSecret); + + // Create two JSON values with DIFFERENT structures using JavaScript objects + const data1 = { + patient_id: "patient-001", + diagnosis: "Flu", + temperature: 38.5 + }; + + const data2 = { + patient_id: "patient-002 with a very long ID that makes the structure different", + diagnosis: "Flu but with very long description that makes the structure different", + temperature: 38.5 + }; + + // Convert to PEP JSON with different pseudonym fields + const record1 = PEPJSONBuilder.fromJson(data1, ["patient_id"]).build(); + const record2 = PEPJSONBuilder.fromJson(data2, ["patient_id"]).build(); + + // Encrypt both records + const encrypted1 = encryptJson(record1, sessionKeys); + const encrypted2 = encryptJson(record2, sessionKeys); + + // Verify they have different structures + const structure1 = encrypted1.structure(); + const structure2 = encrypted2.structure(); + expect(structure1.equals(structure2)).toBe(false); + + // Attempt batch transcryption (this should throw an error because structures don't match) + const transcryptionInfo = new TranscryptionInfo( + domainA, + domainB, + session, + session, + pseudoSecret, + encSecret + ); + + // Verify we get an error about structure mismatch + expect(() => { + transcryptJsonBatch([encrypted1, encrypted2], transcryptionInfo); + }).toThrow(/Inconsistent structure in batch/); + + // We can encrypt them in a batch which automatically adds padding to make structures consistent + const encryptedBatch = encryptJsonBatch([record1, record2], sessionKeys); + const transcryptedBatch = transcryptJsonBatch(encryptedBatch, transcryptionInfo); + + // Verify we got 2 records back + expect(transcryptedBatch.length).toBe(2); +});