diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index 287225589..5dd980f0f 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,14 +33,14 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "d5085fda8"; + public static final String gitCommitId = "723dfee80"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitDate = "2026-04-29"; + public static final String gitCommitDate = "2026-04-30"; /** * Build timestamp in Perl 5 "Compiled at" format (e.g., "Apr 7 2026 11:20:00"). @@ -48,7 +48,7 @@ public final class Configuration { * Parsed by App::perlbrew and other tools via: perl -V | grep "Compiled at" * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String buildTimestamp = "Apr 30 2026 08:15:13"; + public static final String buildTimestamp = "Apr 30 2026 10:12:03"; // Prevent instantiation private Configuration() { diff --git a/src/main/java/org/perlonjava/runtime/operators/pack/PackGroupHandler.java b/src/main/java/org/perlonjava/runtime/operators/pack/PackGroupHandler.java index fe8d91f43..a85e18f9d 100644 --- a/src/main/java/org/perlonjava/runtime/operators/pack/PackGroupHandler.java +++ b/src/main/java/org/perlonjava/runtime/operators/pack/PackGroupHandler.java @@ -340,10 +340,14 @@ public static GroupResult handleSlashConstruct(String template, int position, in if (stringCount >= 0) { effectiveCount = stringCount; } else { - byte[] strBytes = byteMode - ? str.getBytes(StandardCharsets.ISO_8859_1) - : str.getBytes(StandardCharsets.UTF_8); - effectiveCount = strBytes.length; + // Match the byte/char count used by PackWriter.writeString: + // - byte mode (U0): raw bytes via ISO-8859-1 + // - normal mode: one unit per Java character (writeString writes + // one byte per ISO-8859-1 char, or one codepoint for high Unicode) + int unitCount = byteMode + ? str.getBytes(StandardCharsets.ISO_8859_1).length + : str.length(); + effectiveCount = unitCount; if (stringFormat == 'Z') { effectiveCount++; // Include null terminator in count } diff --git a/src/test/resources/unit/pack/slash_string.t b/src/test/resources/unit/pack/slash_string.t new file mode 100644 index 000000000..bdf2a48c5 --- /dev/null +++ b/src/test/resources/unit/pack/slash_string.t @@ -0,0 +1,59 @@ +#!/usr/bin/perl +use strict; +use warnings; +use Test::More tests => 7; + +# Tests for the "len/Z*" / "len/a*" / "len/A*" pack constructs with byte +# strings (Latin-1 high bytes). Regression for a bug where the slash +# construct used getBytes(UTF-8) to compute the length prefix while +# writeString() emitted ISO-8859-1 bytes, producing a wrong (doubled) +# length and trailing zero padding. This is what BSON::PP triggers via +# pack("V/Z*", $utf8_encoded_string). + +my $latin = "\xc3\xa9\xc3\xa9\xc3\xa9"; # 6 bytes, no utf8 flag + +is( + unpack("H*", pack("V/Z*", $latin)), + "07000000c3a9c3a9c3a900", + 'V/Z* length prefix counts bytes (not re-encoded UTF-8) for Z*', +); + +is( + unpack("H*", pack("V/a*", $latin)), + "06000000c3a9c3a9c3a9", + 'V/a* length prefix counts bytes for a*', +); + +is( + unpack("H*", pack("V/A*", $latin)), + "06000000c3a9c3a9c3a9", + 'V/A* length prefix counts bytes for A*', +); + +is( + unpack("H*", pack("n/Z*", $latin)), + "0007c3a9c3a9c3a900", + 'n/Z* length prefix counts bytes for Z*', +); + +# Round-trip through unpack +{ + my $p = pack("V/a*", $latin); + my ($got) = unpack("V/a*", $p); + is($got, $latin, 'V/a* round-trips a Latin-1 byte string'); +} + +# Mirror BSON::PP's exact use: a 0x02 (string) field in a tiny BSON doc. +# Field value is "ééééée" already utf8-encoded to 12 bytes. The BSON +# string framing should report length=13 (12 bytes + NUL) and emit +# exactly 13 bytes of payload. +{ + my $v = "\xc3\xa9" x 6; # 12 bytes, utf8 flag off + my $p = pack("V/Z*", $v); + is(length($p), 4 + 13, 'V/Z* total length is 4 (len prefix) + bytes + NUL'); + is( + unpack("H*", $p), + "0d000000" . ("c3a9" x 6) . "00", + 'V/Z* matches BSON wire format', + ); +}