From 31f5e395369639c4ad22ec79e29de0c65456aa98 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Fri, 13 Mar 2026 15:34:21 +0100 Subject: [PATCH 1/2] Perf: Remove unnecessary fsync from file I/O (6x speedup) JFR profiling revealed that 94% of native method time in I/O-heavy workloads (like Image::ExifTool) was spent in FileChannel.force() calls - synchronous disk flushes (fsync). The previous implementation incorrectly called force(true) on every flush() and close(), which is extremely slow (10-100ms per call). Changes: - CustomFileChannel.close(): Remove force() - OS flushes on close - CustomFileChannel.flush(): Now no-op (FileChannel is unbuffered) - CustomFileChannel.sync(): NEW - explicit fsync when truly needed - CustomFileChannel.fileno(): Return synthetic fd (was undef) - IOHandle interface: Add sync() with documentation on flush vs sync - LayeredIOHandle: Delegate sync() to underlying handle - IOHandle perlmodule: Wire up _sync() to call ioHandle.sync() - GlobalContext: Initialize IOHandle methods at startup - IO/Handle.pm: Fix Java backend detection Perl semantics preserved: - flush() flushes app buffers to OS kernel buffer - sync() calls fsync to write to physical disk (IO::Handle->sync) Performance: - ExifTool.t: 16.7s -> 2.9s (5.8x faster) Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin --- .../runtime/io/CustomFileChannel.java | 43 +++++++-- .../org/perlonjava/runtime/io/IOHandle.java | 91 +++++++++++++++++++ .../runtime/io/LayeredIOHandle.java | 14 +++ .../runtime/perlmodule/IOHandle.java | 36 +++++--- .../runtime/runtimetypes/GlobalContext.java | 1 + src/main/perl/lib/IO/Handle.pm | 10 +- 6 files changed, 164 insertions(+), 31 deletions(-) diff --git a/src/main/java/org/perlonjava/runtime/io/CustomFileChannel.java b/src/main/java/org/perlonjava/runtime/io/CustomFileChannel.java index 8f3b183fb..bc39a3105 100644 --- a/src/main/java/org/perlonjava/runtime/io/CustomFileChannel.java +++ b/src/main/java/org/perlonjava/runtime/io/CustomFileChannel.java @@ -195,13 +195,15 @@ public RuntimeScalar write(String string) { /** * Closes the file channel and releases any system resources. * + *

Note: We intentionally do NOT call force() here. The OS will flush + * buffers on close, and force() (fsync) is extremely slow. If explicit + * sync-to-disk is needed, use {@link #sync()} before closing. + * * @return RuntimeScalar with true value on success */ @Override public RuntimeScalar close() { try { - // Ensure all data is flushed before closing - fileChannel.force(true); // Force both content and metadata fileChannel.close(); return scalarTrue; } catch (IOException e) { @@ -292,33 +294,54 @@ public RuntimeScalar seek(long pos, int whence) { /** * Flushes any buffered data to the underlying storage device. * - *

This method forces any buffered data to be written to the storage device, - * including file metadata for reliability. + *

For FileChannel, writes go directly to the OS buffer (no Java-side buffering), + * so this is effectively a no-op. We intentionally do NOT call force() here + * because fsync is extremely slow. Use {@link #sync()} for explicit disk sync. * * @return RuntimeScalar with true on success */ @Override public RuntimeScalar flush() { + // FileChannel has no Java-side buffer to flush. + // We don't call force() here because it's extremely slow (fsync). + // Use sync() if explicit disk synchronization is needed. + return scalarTrue; + } + + /** + * Synchronizes file data to the underlying storage device (fsync). + * + *

This method forces all buffered data and metadata to be written to + * the physical storage device. This is slow but guarantees data durability. + * Use this only when you need to ensure data survives a system crash. + * + * @return RuntimeScalar with true on success + */ + public RuntimeScalar sync() { try { - // Force both content and metadata to be written for reliability fileChannel.force(true); return scalarTrue; } catch (IOException e) { - return handleIOException(e, "flush failed"); + return handleIOException(e, "sync failed"); } } /** * Gets the file descriptor number for this channel. * - *

Note: FileChannel does not expose the underlying file descriptor in Java, - * so this method returns undef. This is a limitation of the Java API. + *

Java's FileChannel does not expose the underlying OS file descriptor. + * We return a synthetic file descriptor based on the object's identity hash, + * starting from 3 (to avoid collision with stdin=0, stdout=1, stderr=2). + * This allows Perl code that checks {@code defined fileno($fh)} to work correctly. * - * @return RuntimeScalar with undef value + * @return RuntimeScalar with a synthetic file descriptor number */ @Override public RuntimeScalar fileno() { - return RuntimeScalarCache.scalarUndef; // FileChannel does not expose a file descriptor + // Return a synthetic fd based on object identity, starting from 3 + // (0=stdin, 1=stdout, 2=stderr are reserved) + int syntheticFd = 3 + (System.identityHashCode(this) & 0x7FFFFFFF) % 1000000; + return new RuntimeScalar(syntheticFd); } /** diff --git a/src/main/java/org/perlonjava/runtime/io/IOHandle.java b/src/main/java/org/perlonjava/runtime/io/IOHandle.java index b2d21f8ee..6926d1525 100644 --- a/src/main/java/org/perlonjava/runtime/io/IOHandle.java +++ b/src/main/java/org/perlonjava/runtime/io/IOHandle.java @@ -2,12 +2,46 @@ import org.perlonjava.runtime.runtimetypes.RuntimeIO; import org.perlonjava.runtime.runtimetypes.RuntimeScalar; +import org.perlonjava.runtime.runtimetypes.RuntimeScalarCache; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.util.ArrayDeque; import java.util.Deque; +/** + * Base interface for all I/O handle implementations in PerlOnJava. + * + *

Flush vs Sync Semantics

+ * + *

This interface distinguishes between two related but distinct operations:

+ * + *

flush() - Buffer Flush

+ *

Flushes any application-level buffers to the operating system's kernel buffer. + * This is equivalent to Perl's {@code $fh->flush()} or C's {@code fflush()}. + * After flush(), data is visible to other processes but may not yet be on physical disk.

+ * + *

For unbuffered I/O (like Java NIO FileChannel), flush() is a no-op since writes + * go directly to the kernel buffer.

+ * + *

sync() - Disk Synchronization

+ *

Forces data to be written to the physical storage device (fsync). + * This is equivalent to Perl's {@code IO::Handle->sync()} or POSIX {@code fsync()}. + * This operation is slow but guarantees data durability in case of system crash.

+ * + *

Performance Note

+ *

fsync() is extremely slow (can take 10-100ms per call). The previous implementation + * incorrectly called fsync() on every flush() and close(), causing severe performance + * issues for I/O-heavy workloads like Image::ExifTool (94% of time spent in fsync).

+ * + *

Use sync() only when you need guaranteed durability (e.g., database commits, + * critical configuration saves). For normal file operations, flush() or just close() + * is sufficient - the OS will eventually write data to disk.

+ * + * @see CustomFileChannel + * @see StandardIO + * @see LayeredIOHandle + */ public interface IOHandle { int SEEK_SET = 0; // Seek from beginning of file @@ -17,12 +51,69 @@ public interface IOHandle { // Buffer for pushed-back byte values ThreadLocal> ungetBuffer = ThreadLocal.withInitial(ArrayDeque::new); + /** + * Writes data to this I/O handle. + * + * @param string the data to write + * @return RuntimeScalar with true on success, false on failure + */ RuntimeScalar write(String string); + /** + * Closes this I/O handle and releases system resources. + * + *

Note: This does NOT call sync()/fsync. The OS will flush kernel buffers + * on close. If you need guaranteed disk durability, call sync() before close().

+ * + * @return RuntimeScalar with true on success + */ RuntimeScalar close(); + /** + * Flushes application-level buffers to the operating system. + * + *

This is equivalent to Perl's {@code $fh->flush()} or C's {@code fflush()}. + * After this call, data is in the OS kernel buffer and visible to other processes, + * but may not yet be on physical disk.

+ * + *

For unbuffered I/O (like Java NIO FileChannel), this is a no-op since + * writes go directly to the kernel buffer.

+ * + *

Note: This does NOT call fsync(). Use {@link #sync()} if you need + * guaranteed disk durability.

+ * + * @return RuntimeScalar with true on success + * @see #sync() + */ RuntimeScalar flush(); + /** + * Synchronizes data to physical storage (fsync). + * + *

This is equivalent to Perl's {@code IO::Handle->sync()} or POSIX {@code fsync()}. + * This forces all buffered data and metadata to be written to the physical storage + * device, guaranteeing durability in case of system crash.

+ * + *

Warning: This operation is extremely slow (10-100ms typical). + * Only use when you truly need guaranteed durability, such as:

+ *
    + *
  • Database transaction commits
  • + *
  • Critical configuration file saves
  • + *
  • Financial transaction logs
  • + *
+ * + *

For normal file operations, just close() the file - the OS will + * eventually write data to disk.

+ * + * @return RuntimeScalar with true on success + * @see #flush() + */ + default RuntimeScalar sync() { + // Default implementation: no-op for handles that don't support sync + // (e.g., sockets, pipes, stdin/stdout) + return RuntimeScalarCache.scalarTrue; + } + // Default ungetc implementation - takes a byte value (0-255) default RuntimeScalar ungetc(int byteValue) { if (byteValue == -1) { diff --git a/src/main/java/org/perlonjava/runtime/io/LayeredIOHandle.java b/src/main/java/org/perlonjava/runtime/io/LayeredIOHandle.java index 148734095..fbff54a03 100644 --- a/src/main/java/org/perlonjava/runtime/io/LayeredIOHandle.java +++ b/src/main/java/org/perlonjava/runtime/io/LayeredIOHandle.java @@ -380,6 +380,20 @@ public RuntimeScalar flush() { return delegate.flush(); } + /** + * Synchronizes data to physical storage (fsync). + * + *

This method passes through to the delegate's sync method. + * Use this only when you need guaranteed disk durability.

+ * + * @return the result from the delegate's sync operation + * @see IOHandle#sync() + */ + @Override + public RuntimeScalar sync() { + return delegate.sync(); + } + /** * Closes the handle and cleans up all layers. * diff --git a/src/main/java/org/perlonjava/runtime/perlmodule/IOHandle.java b/src/main/java/org/perlonjava/runtime/perlmodule/IOHandle.java index b0dd5281d..2ddebdb2f 100644 --- a/src/main/java/org/perlonjava/runtime/perlmodule/IOHandle.java +++ b/src/main/java/org/perlonjava/runtime/perlmodule/IOHandle.java @@ -85,7 +85,16 @@ public static RuntimeList _clearerr(RuntimeArray args, int ctx) { } /** - * Sync file's in-memory state with physical medium + * Sync file's in-memory state with physical medium (fsync). + * + *

This synchronizes a file's in-memory state with that on the physical medium. + * It operates at the file descriptor level (like sysread, sysseek), not at the + * perlio API level. Data buffered at the perlio level must be flushed first + * with flush().

+ * + *

Returns "0 but true" on success, undef on error or invalid handle.

+ * + * @see IO::Handle->sync */ public static RuntimeList _sync(RuntimeArray args, int ctx) { if (args.size() != 1) { @@ -94,26 +103,25 @@ public static RuntimeList _sync(RuntimeArray args, int ctx) { RuntimeIO fh = RuntimeIO.getRuntimeIO(args.get(0)); if (fh == null || fh.ioHandle == null) { - return new RuntimeList(); + return new RuntimeList(); // undef for invalid handle } try { - // First flush any buffered data + // First flush any perlio-level buffered data fh.flush(); - // For file handles, we've done what we can with flush() - // The JVM doesn't provide a portable way to force fsync - // Most implementations will sync on flush anyway - - // Note: If you need true fsync behavior, you would need to: - // 1. Add a sync() method to the IOHandle interface - // 2. Implement it in CustomFileChannel using FileChannel.force(true) - // 3. Call it here: fh.ioHandle.sync(); - - return new RuntimeList(new RuntimeScalar("0 but true")); + // Now call sync() to force fsync on the file descriptor + RuntimeScalar result = fh.ioHandle.sync(); + + if (result.getBoolean()) { + // Return "0 but true" on success per Perl convention + return new RuntimeList(new RuntimeScalar("0 but true")); + } else { + return new RuntimeList(); // undef on error + } } catch (Exception e) { RuntimeIO.handleIOError("sync failed: " + e.getMessage()); - return new RuntimeList(); + return new RuntimeList(); // undef on error } } diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/GlobalContext.java b/src/main/java/org/perlonjava/runtime/runtimetypes/GlobalContext.java index fa695fcd2..6bc17b464 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/GlobalContext.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/GlobalContext.java @@ -194,6 +194,7 @@ public static void initializeGlobals(CompilerOptions compilerOptions) { Encode.initialize(); JavaSystem.initialize(); PerlIO.initialize(); + IOHandle.initialize(); // IO::Handle methods (_sync, _error, etc.) Version.initialize(); // Initialize version module for version objects DynaLoader.initialize(); XSLoader.initialize(); // XSLoader will load other classes on-demand diff --git a/src/main/perl/lib/IO/Handle.pm b/src/main/perl/lib/IO/Handle.pm index 24dcd6883..9995dc694 100644 --- a/src/main/perl/lib/IO/Handle.pm +++ b/src/main/perl/lib/IO/Handle.pm @@ -47,13 +47,9 @@ use constant _IOFBF => 0; # Fully buffered use constant _IOLBF => 1; # Line buffered use constant _IONBF => 2; # Unbuffered -# Try to load Java backend if available -my $has_java_backend = 0; -eval { - require 'org.perlonjava.runtime.perlmodule.IOHandleModule'; - IOHandleInit(); - $has_java_backend = 1; -}; +# Check if Java backend methods are available (registered by IOHandle.initialize()) +# The _sync function is registered directly into IO::Handle namespace by Java code +our $has_java_backend = defined &IO::Handle::_sync; # Constructor sub new { From 9c096bd4f839fcd8a7316e7ff1f61220614e11ce Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Fri, 13 Mar 2026 15:51:40 +0100 Subject: [PATCH 2/2] Fix: Revert fileno() to return undef for CustomFileChannel The synthetic fileno implementation broke tests that relied on undef == undef comparison (io/perlio_leaks.t) and consistent fd numbers. Changes: - CustomFileChannel.fileno(): Return undef (matches master behavior) - IO/Handle.pm sync()/flush(): Remove `defined fileno()` checks since Java backend handles invalid handles internally Test results restored to master baseline: - io/perlio_leaks.t: 12/12 (was 0/12) - io/dup.t: 26/29 (was 17/29) - op/require_37033.t: 7/10 (unchanged) Performance still improved: ExifTool.t 3.2s (was 16.7s) Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin --- .../java/org/perlonjava/runtime/io/CustomFileChannel.java | 8 ++++---- src/main/perl/lib/IO/Handle.pm | 7 ++++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/perlonjava/runtime/io/CustomFileChannel.java b/src/main/java/org/perlonjava/runtime/io/CustomFileChannel.java index bc39a3105..01dc2212d 100644 --- a/src/main/java/org/perlonjava/runtime/io/CustomFileChannel.java +++ b/src/main/java/org/perlonjava/runtime/io/CustomFileChannel.java @@ -338,10 +338,10 @@ public RuntimeScalar sync() { */ @Override public RuntimeScalar fileno() { - // Return a synthetic fd based on object identity, starting from 3 - // (0=stdin, 1=stdout, 2=stderr are reserved) - int syntheticFd = 3 + (System.identityHashCode(this) & 0x7FFFFFFF) % 1000000; - return new RuntimeScalar(syntheticFd); + // Java's FileChannel does not expose the underlying OS file descriptor. + // Return undef to match Perl's behavior for handles without a real fd. + // Note: Validity checks should be done in the Java backend, not via fileno(). + return RuntimeScalarCache.scalarUndef; } /** diff --git a/src/main/perl/lib/IO/Handle.pm b/src/main/perl/lib/IO/Handle.pm index 9995dc694..ab3c241f4 100644 --- a/src/main/perl/lib/IO/Handle.pm +++ b/src/main/perl/lib/IO/Handle.pm @@ -361,7 +361,8 @@ sub clearerr { sub sync { my $fh = shift; - return undef unless defined fileno($fh); + # Note: Don't check fileno() here - Java's FileChannel returns undef for fileno + # but the Java backend handles invalid handles internally in _sync() if ($has_java_backend) { return _sync($fh); @@ -375,8 +376,8 @@ sub sync { sub flush { my $fh = shift; - # First check if handle is valid - return undef unless defined fileno($fh); + # Note: Don't check fileno() here - Java's FileChannel returns undef for fileno + # The flush implementation works regardless of fileno value # Save and restore selected handle my $old_fh = select($fh);