From 2c3f3e3e7ed4f84db4ebe909d1fc0406544fd093 Mon Sep 17 00:00:00 2001 From: Jorge Solorzano Date: Wed, 6 May 2026 13:56:52 +0200 Subject: [PATCH 1/5] chore(docs): lint README.md --- README.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 20c5a6d..2b629a4 100644 --- a/README.md +++ b/README.md @@ -52,8 +52,10 @@ Import Maven dependency: The normal usage is to import the dependency of the Stringprep profile to use, and lookup the provider service that contains the profile. -### Example: +### Example + Import the `SASLprep` dependency, this transitively imports the `Stringprep` dependency. + ```xml com.ongres.stringprep @@ -63,6 +65,7 @@ Import the `SASLprep` dependency, this transitively imports the `Stringprep` dep ``` Get the `SASLprep` provider service: + ```java Profile saslPrep = Stringprep.getProvider("SASLprep"); String prepared = saslPrep.prepareStored("I\u00ADX \u2168"); @@ -72,6 +75,7 @@ prepared.equals("IX IX"); // true You could also (only) use the stringprep dependency to create your own profiles by implementing the `Profile` interface, just override the `profile()` method with the set of options. Anonymous on-the-fly profile usage: + ```java Profile saslPrep = () -> EnumSet.of(Option.NORMALIZE_KC, Option.MAP_TO_NOTHING); String prepared = saslPrep.prepareStored("I\u00ADX ⑳"); @@ -80,7 +84,8 @@ prepared.equals("IX 20"); // true > Please note that when two protocols that use different profiles of stringprep interoperate, there may be conflict about what characters are and are not allowed in the final string. Thus, protocol developers should strongly consider re-using existing profiles of stringprep. -### Java Modules (JPMS): +### Java Modules (JPMS) + The Stringprep and profiles implementation are explicit Java modules with the names: * `com.ongres.stringprep` @@ -90,6 +95,7 @@ The Stringprep and profiles implementation are explicit Java modules with the na If you depend on a specific profile (`saslprep` or `nameprep`) there is an implied readability on `stringprep`, so you will only need to declare in your `module-info.java` the profile module and get the service from the provider. Example `module-info.java`: + ```java module test.app { requires com.ongres.saslprep; From 682a91395cca40999d63a728559836abf03fc261 Mon Sep 17 00:00:00 2001 From: Jorge Solorzano Date: Wed, 6 May 2026 14:07:49 +0200 Subject: [PATCH 2/5] fix: ArrayIndexOutOfBoundsException on empty string after mapping If the mapping phase (Step 1) or Normalization phase (Step 2) resulted in an empty string (e.g., a string consisting entirely of characters mapped to "nothing"), the code would throw an ArrayIndexOutOfBoundsException. Now there is a guard clause to return early if the value length is zero. --- .../test/java/test/saslprep/SaslPrepTest.java | 26 +++++++++++++++++++ .../com/ongres/stringprep/Stringprep.java | 12 +++++---- .../java/test/stringprep/ProfileTest.java | 17 ++++++++++++ 3 files changed, 50 insertions(+), 5 deletions(-) diff --git a/saslprep/src/test/java/test/saslprep/SaslPrepTest.java b/saslprep/src/test/java/test/saslprep/SaslPrepTest.java index 51d5dae..5758df0 100644 --- a/saslprep/src/test/java/test/saslprep/SaslPrepTest.java +++ b/saslprep/src/test/java/test/saslprep/SaslPrepTest.java @@ -8,16 +8,22 @@ import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.fail; import java.io.IOException; import java.util.EnumSet; +import java.util.Locale; +import java.util.stream.Collectors; import com.ongres.saslprep.SASLprep; import com.ongres.stringprep.Option; import com.ongres.stringprep.Profile; import com.ongres.stringprep.Stringprep; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.EmptySource; +import org.junit.jupiter.params.provider.ValueSource; class SaslPrepTest { @@ -159,4 +165,24 @@ void unassigned() { assertEquals("Unassigned code point \"0x0588\"", e.getMessage()); } } + + @ParameterizedTest + @ValueSource(strings = { "\u200B\u200C\u200D\u034F", "\uFEFF" }) + @EmptySource + void testEmptyMap(String string) { + String stored = saslPrep.prepareStored(string); + assertTrue(stored.isEmpty(), () -> stored.codePoints() + .mapToObj(cp -> String.format(Locale.ROOT, "0x%04X", cp)) + .collect(Collectors.joining(", "))); + } + + @Test + void testAdditionalMappingOptions() { + String stored = saslPrep.prepareStored("\uFEFF\u2000\u3000\u00A0\uFEFF"); + assertEquals(" ", stored, + stored.codePoints() + .mapToObj(cp -> String.format(Locale.ROOT, "0x%04X", cp)) + .collect(Collectors.joining(", "))); + } + } diff --git a/stringprep/src/main/java/com/ongres/stringprep/Stringprep.java b/stringprep/src/main/java/com/ongres/stringprep/Stringprep.java index 7bebca8..28b896b 100644 --- a/stringprep/src/main/java/com/ongres/stringprep/Stringprep.java +++ b/stringprep/src/main/java/com/ongres/stringprep/Stringprep.java @@ -130,11 +130,9 @@ char[] prepare(final char[] string) { return string; } - char[] value = string.clone(); - // 1) Map -- For each character in the input, check if it has a mapping // and, if so, replace it with its mapping. - value = map(value); + char[] value = map(string); // 2) Normalize -- Possibly normalize the result of step 1 using Unicode // normalization. @@ -142,9 +140,13 @@ char[] prepare(final char[] string) { value = Normalizer.normalize(CharBuffer.wrap(value), Normalizer.Form.NFKC).toCharArray(); } + // The mapping/normalization removed all chars, return the empty string. + if (value.length == 0) { + return value; + } + boolean firstRandAlCat = Tables.bidirectionalPropertyRorAL(Character.codePointAt(value, 0)); - boolean lastRandAlCat = - Tables.bidirectionalPropertyRorAL(Character.codePointAt(value, value.length - 1)); + boolean lastRandAlCat = Tables.bidirectionalPropertyRorAL(Character.codePointBefore(value, value.length)); boolean containsRandAlCat = false; boolean containsLcat = false; int codePoint; diff --git a/stringprep/src/test/java/test/stringprep/ProfileTest.java b/stringprep/src/test/java/test/stringprep/ProfileTest.java index 4c58e76..c84699d 100644 --- a/stringprep/src/test/java/test/stringprep/ProfileTest.java +++ b/stringprep/src/test/java/test/stringprep/ProfileTest.java @@ -9,10 +9,12 @@ import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.EnumSet; import java.util.Locale; import java.util.Set; +import java.util.stream.Collectors; import java.util.stream.IntStream; import com.ongres.stringprep.Option; @@ -22,6 +24,7 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.CsvSource; +import org.junit.jupiter.params.provider.EmptySource; import org.junit.jupiter.params.provider.ValueSource; class ProfileTest { @@ -236,4 +239,18 @@ void testProhibitionChangeDisplayProperties(int cp) { () -> "Character: " + chars + ", CodePoint: " + cp); } + @ParameterizedTest + @ValueSource(strings = { "\u200B\u200C\u200D\u034F", "\uFEFF" }) + @EmptySource + void testEmptyMap(String string) { + Profile profile = () -> EnumSet.of( + Option.MAP_TO_NOTHING, + Option.NORMALIZE_KC, + Option.CHECK_BIDI); + String stored = profile.prepareStored(string); + assertTrue(stored.isEmpty(), () -> stored.codePoints() + .mapToObj(cp -> String.format(Locale.ROOT, "0x%04X", cp)) + .collect(Collectors.joining(", "))); + } + } From 9ceafa7fa2dc46c9a6966e792c2ad0068c81a9a6 Mon Sep 17 00:00:00 2001 From: Jorge Solorzano Date: Wed, 6 May 2026 14:34:17 +0200 Subject: [PATCH 3/5] fix: add Normalizer.isNormalized guard to avoid allocations Improve throughput and reduce heap churn, use a "fast-path" check using Normalizer.isNormalized. Normalizer.normalize() always allocates a new buffer/string; this is avoided if the string is already normalized. --- .../src/main/java/com/ongres/stringprep/Stringprep.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/stringprep/src/main/java/com/ongres/stringprep/Stringprep.java b/stringprep/src/main/java/com/ongres/stringprep/Stringprep.java index 28b896b..d556d2d 100644 --- a/stringprep/src/main/java/com/ongres/stringprep/Stringprep.java +++ b/stringprep/src/main/java/com/ongres/stringprep/Stringprep.java @@ -7,6 +7,7 @@ import java.nio.CharBuffer; import java.text.Normalizer; +import java.util.Arrays; import java.util.EnumSet; import java.util.Locale; import java.util.Objects; @@ -137,7 +138,12 @@ char[] prepare(final char[] string) { // 2) Normalize -- Possibly normalize the result of step 1 using Unicode // normalization. if (normalizeKc) { - value = Normalizer.normalize(CharBuffer.wrap(value), Normalizer.Form.NFKC).toCharArray(); + CharBuffer wrap = CharBuffer.wrap(value); + if (!Normalizer.isNormalized(wrap, Normalizer.Form.NFKC)) { + char[] normalized = Normalizer.normalize(wrap, Normalizer.Form.NFKC).toCharArray(); + Arrays.fill(value, '\0'); + value = normalized; + } } // The mapping/normalization removed all chars, return the empty string. From 10d4e8e50e424560f3eddb692725f54a9b6422d9 Mon Sep 17 00:00:00 2001 From: Jorge Solorzano Date: Fri, 8 May 2026 10:17:49 +0200 Subject: [PATCH 4/5] feat: add SecureStringBuilder for memory-safe string building Introduced SecureStringBuilder to handle sensitive data during stringprep operations. Unlike standard StringBuilder, this implementation ensures internal buffers are zeroed out on resize and closure to mitigate memory exposure of cryptographic material. --- .../stringprep/SecureStringBuilder.java | 114 ++++++++++++++++++ .../com/ongres/stringprep/Stringprep.java | 58 +++++---- 2 files changed, 142 insertions(+), 30 deletions(-) create mode 100644 stringprep/src/main/java/com/ongres/stringprep/SecureStringBuilder.java diff --git a/stringprep/src/main/java/com/ongres/stringprep/SecureStringBuilder.java b/stringprep/src/main/java/com/ongres/stringprep/SecureStringBuilder.java new file mode 100644 index 0000000..c7b4396 --- /dev/null +++ b/stringprep/src/main/java/com/ongres/stringprep/SecureStringBuilder.java @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2026 OnGres, Inc. + * SPDX-License-Identifier: BSD-2-Clause + */ + +package com.ongres.stringprep; + +import java.util.Arrays; + +/** + * A memory-safe string builder alternative designed specifically for cryptographic operations. + * + *

Standard {@link String} and {@link StringBuilder} classes leave sensitive data (like passwords + * or encryption keys) in memory until garbage collection occurs. This class mitigates that risk + * by ensuring that internal character arrays are explicitly zeroed out when the buffer is resized, + * and when the resource is closed. + * + * @see AutoCloseable + */ +final class SecureStringBuilder implements AutoCloseable { + private char[] buffer; + private int length; + + /** + * Constructs a new {@code SecureStringBuilder} with the specified initial capacity. + * + * @param initialCapacity the initial capacity of the secure buffer. + * @throws IllegalArgumentException if {@code initialCapacity} is negative. + */ + SecureStringBuilder(int initialCapacity) { + if (initialCapacity < 0) { + throw new IllegalArgumentException("Initial capacity cannot be negative"); + } + this.buffer = new char[initialCapacity]; + this.length = 0; + } + + /** + * Ensures that the internal buffer has enough capacity to hold the specified minimum + * number of characters. If a resize is required, the old array is securely wiped + * before being discarded. + * + * @param minCapacity the desired minimum capacity. + * @throws OutOfMemoryError if the required size exceeds JVM array limits. + */ + private void ensureCapacity(int minCapacity) { + if (minCapacity > buffer.length) { + // Use long to prevent integer overflow when doubling + int newCapacity = (int) Math.min(Integer.MAX_VALUE, Math.max(buffer.length * 2L, minCapacity)); + char[] newBuffer = new char[newCapacity]; + System.arraycopy(buffer, 0, newBuffer, 0, length); + + // SECURE WIPE: Zero out the old array before abandoning it to the GC + Arrays.fill(buffer, '\0'); + buffer = newBuffer; + } + } + + /** + * Appends a single Unicode code point to this buffer. + * + *

This method correctly handles supplementary characters by converting them + * into their corresponding UTF-16 surrogate pairs if necessary. + * + * @param codePoint the Unicode code point to append. + * @throws IllegalArgumentException if the specified code point is not a valid Unicode code point. + * @throws IllegalStateException if the builder has been closed. + */ + void appendCodePoint(int codePoint) { + if (buffer == null) { + throw new IllegalStateException("SecureStringBuilder is closed"); + } + int charCount = Character.charCount(codePoint); + ensureCapacity(this.length + charCount); + Character.toChars(codePoint, this.buffer, this.length); + this.length += charCount; + } + + /** + * Extracts a copy of the current buffer sized exactly to the appended content. + * + *

Security Warning: This method allocates a new array containing the + * sensitive data. The internal buffer remains intact until {@link #close()} is called. + * The caller assumes full responsibility for securely wiping the returned array + * (e.g., using {@link Arrays#fill(char[], char)}) as soon as it is no longer needed. + * + * @return a new, exact-sized character array containing the buffer's contents. + * @throws IllegalStateException if the builder has been closed. + */ + char[] toCharArray() { + if (buffer == null) { + throw new IllegalStateException("SecureStringBuilder is closed"); + } + char[] result = new char[length]; + System.arraycopy(buffer, 0, result, 0, length); + return result; + } + + /** + * Securely wipes the internal buffer by overwriting all contents with null characters + * ('\0') and resets the length to zero. + * + *

This method should be called inside a {@code finally} block or implicitly via + * a {@code try-with-resources} statement to guarantee cleanup. + */ + @Override + public void close() { + if (buffer != null) { + Arrays.fill(buffer, '\0'); + buffer = null; //NOPMD + } + length = 0; + } +} diff --git a/stringprep/src/main/java/com/ongres/stringprep/Stringprep.java b/stringprep/src/main/java/com/ongres/stringprep/Stringprep.java index d556d2d..29cc9e4 100644 --- a/stringprep/src/main/java/com/ongres/stringprep/Stringprep.java +++ b/stringprep/src/main/java/com/ongres/stringprep/Stringprep.java @@ -198,40 +198,38 @@ char[] prepare(final char[] string) { } private char[] map(char[] string) { - final StringBuilder mapping = new StringBuilder(string.length); - for (int codePoint, i = 0; i < string.length; i += Character.charCount(codePoint)) { - codePoint = Character.codePointAt(string, i); + try (SecureStringBuilder mapping = new SecureStringBuilder(string.length + 16)) { + for (int codePoint, i = 0; i < string.length; i += Character.charCount(codePoint)) { + codePoint = Character.codePointAt(string, i); - if (mapToNothing && Tables.mapToNothing(codePoint)) { // NOPMD - // The following characters are simply deleted from the input (that is, - // they are mapped to nothing) because their presence or absence in - // protocol identifiers should not make two strings different. They are - // listed in Table B.1. - } else if (normalizeKc && caseFoldNfkc) { - // appendix B.2 is for profiles that also use Unicode - // normalization form KC - for (int cp : Tables.mapWithNfkc(codePoint)) { - mapping.appendCodePoint(cp); + if (mapToNothing && Tables.mapToNothing(codePoint)) { // NOPMD + // The following characters are simply deleted from the input (that is, + // they are mapped to nothing) because their presence or absence in + // protocol identifiers should not make two strings different. They are + // listed in Table B.1. + } else if (normalizeKc && caseFoldNfkc) { + // appendix B.2 is for profiles that also use Unicode + // normalization form KC + for (int cp : Tables.mapWithNfkc(codePoint)) { + mapping.appendCodePoint(cp); + } + } else if (!normalizeKc && caseFoldNoNormalization) { + // while appendix B.3 is for profiles that do + // not use Unicode normalization + for (int cp : Tables.mapWithoutNormalization(codePoint)) { + mapping.appendCodePoint(cp); + } + } else if (additionalMapping) { + // - Any additional mapping tables specific to the profile + for (int cp : profile.additionalMappingTable(codePoint)) { + mapping.appendCodePoint(cp); + } + } else { + mapping.appendCodePoint(codePoint); } - } else if (!normalizeKc && caseFoldNoNormalization) { - // while appendix B.3 is for profiles that do - // not use Unicode normalization - for (int cp : Tables.mapWithoutNormalization(codePoint)) { - mapping.appendCodePoint(cp); - } - } else if (additionalMapping) { - // - Any additional mapping tables specific to the profile - for (int cp : profile.additionalMappingTable(codePoint)) { - mapping.appendCodePoint(cp); - } - } else { - mapping.appendCodePoint(codePoint); } + return mapping.toCharArray(); } - - char[] arr = new char[mapping.length()]; - mapping.getChars(0, mapping.length(), arr, 0); - return arr; } private void prohibitedOutput(int codePoint) { From 7c13108c29b9dc466462d0795c7dc161f9502bfd Mon Sep 17 00:00:00 2001 From: Jorge Solorzano Date: Fri, 8 May 2026 11:07:53 +0200 Subject: [PATCH 5/5] chore: replace individual boolean flags with EnumSet from profile Refactored Stringprep to store the EnumSet directly instead of unpacking it into 18 individual boolean flags. This eliminates constructor boilerplate, reduces class bloat, and simplifies the internal state. --- .../com/ongres/stringprep/Stringprep.java | 117 ++++++------------ 1 file changed, 40 insertions(+), 77 deletions(-) diff --git a/stringprep/src/main/java/com/ongres/stringprep/Stringprep.java b/stringprep/src/main/java/com/ongres/stringprep/Stringprep.java index 29cc9e4..b2e53b2 100644 --- a/stringprep/src/main/java/com/ongres/stringprep/Stringprep.java +++ b/stringprep/src/main/java/com/ongres/stringprep/Stringprep.java @@ -23,24 +23,7 @@ public final class Stringprep { private final Profile profile; - private final boolean mapToNothing; - private final boolean additionalMapping; - private final boolean caseFoldNfkc; - private final boolean caseFoldNoNormalization; - private final boolean normalizeKc; - private final boolean checkBidi; - private final boolean forbidAdditionalCharacters; - private final boolean forbidAsciiSpaces; - private final boolean forbidNonAsciiSpaces; - private final boolean forbidAsciiControl; - private final boolean forbidNonAsciiControl; - private final boolean forbidPrivateUse; - private final boolean forbidNonCharacter; - private final boolean forbidSurrogate; - private final boolean forbidInappropriatePlainText; - private final boolean forbidInappropriateCanonRep; - private final boolean forbidChangeDisplayDeprecated; - private final boolean forbidTagging; + private final Set