-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathLayeredIOHandle.java
More file actions
509 lines (474 loc) · 18.5 KB
/
LayeredIOHandle.java
File metadata and controls
509 lines (474 loc) · 18.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
package org.perlonjava.runtime.io;
import org.perlonjava.runtime.runtimetypes.RuntimeScalar;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.function.Function;
/**
* Implementation of Perl's layered IO system for filehandles.
*
* <p>This class provides a simplified implementation of Perl's PerlIO layer
* system, allowing multiple transformations to be stacked on an IO handle.
* It wraps an underlying IOHandle and applies a pipeline of layers for
* input and output transformations.</p>
*
* <p>In Perl, layered IO allows you to stack multiple transformations:</p>
* <pre>
* # Open with multiple layers
* open(my $fh, '<:encoding(UTF-8):crlf', 'file.txt');
*
* # Change layers on existing handle
* binmode($fh, ':raw');
* binmode($fh, ':encoding(iso-8859-1):crlf');
* </pre>
*
* <p>Key features of this implementation:</p>
* <ul>
* <li>Supports stacking multiple IO layers in order</li>
* <li>Handles standard Perl layers: :raw, :bytes, :crlf, :utf8, :encoding(name)</li>
* <li>Maintains separate pipelines for input and output transformations</li>
* <li>Properly resets layer state when needed</li>
* </ul>
*
* <p>The layer pipeline is implemented using Java's Function composition,
* where each layer's transformation is composed with the previous ones
* to create an efficient processing pipeline.</p>
*
* @see IOHandle
* @see IOLayer
*/
public class LayeredIOHandle implements IOHandle {
/**
* List of currently active layers.
* Maintained for proper cleanup and reset operations.
*/
public final List<IOLayer> activeLayers = new ArrayList<>();
/**
* The underlying IO handle that performs actual read/write operations.
* This could be a file handle, socket handle, or any other IOHandle implementation.
*/
private final IOHandle delegate;
/**
* The composed function pipeline for input transformations.
* Each layer's processInput method is composed into this pipeline
* in the order they were applied.
*/
private Function<String, String> inputPipeline = Function.identity();
/**
* The composed function pipeline for output transformations.
* Each layer's processOutput method is composed into this pipeline
* in the order they were applied.
*/
private Function<String, String> outputPipeline = Function.identity();
/**
* Constructs a new layered IO handle wrapping the given delegate.
*
* <p>Initially, no layers are applied, so all operations pass through
* to the delegate unchanged. Layers can be added using the binmode() method.</p>
*
* @param delegate the underlying IO handle to wrap
* @throws NullPointerException if delegate is null
*/
public LayeredIOHandle(IOHandle delegate) {
this.delegate = delegate;
}
/**
* Returns the underlying delegate handle.
*
* <p>This can be useful for accessing handle-specific functionality
* or for debugging purposes.</p>
*
* @return the wrapped IO handle
*/
public IOHandle getDelegate() {
return delegate;
}
/**
* Writes data to the handle, applying all output layers.
*
* <p>The data passes through the output pipeline in the order layers
* were added, transforming the data before it reaches the underlying
* handle. For example, with :encoding(UTF-8):crlf layers:</p>
* <ol>
* <li>The encoding layer converts characters to UTF-8 bytes</li>
* <li>The crlf layer converts LF to CRLF</li>
* <li>The transformed data is written to the delegate</li>
* </ol>
*
* @param data the string data to write
* @return a RuntimeScalar indicating success (1) or failure (0)
*/
@Override
public RuntimeScalar write(String data) {
// Apply output pipeline
String processed = outputPipeline.apply(data);
return delegate.write(processed);
}
/**
* Reads data from the handle, applying all input layers.
*
* <p>This method implements a sophisticated reading strategy that handles
* character encoding boundaries properly. It may read multiple chunks from
* the delegate to ensure complete character sequences are returned.</p>
*
* <p>The data passes through the input pipeline in the order layers
* were added. For example, with :crlf:encoding(UTF-8) layers:</p>
* <ol>
* <li>Raw bytes are read from the delegate</li>
* <li>The crlf layer converts CRLF to LF</li>
* <li>The encoding layer decodes UTF-8 bytes to characters</li>
* </ol>
*
* <p>The method handles cases where multi-byte sequences are split
* across read boundaries by reading additional data when necessary.</p>
*
* @param maxBytes the maximum number of bytes to read
* @param charset the character set (currently unused, layers handle encoding)
* @return a RuntimeScalar containing the read data
*/
@Override
public RuntimeScalar doRead(int maxBytes, Charset charset) {
// If no active layers, delegate directly (byte-based reading)
if (activeLayers.isEmpty()) {
return delegate.doRead(maxBytes, charset);
}
// For encoding layers, use precise character-based reading
StringBuilder result = new StringBuilder();
int charactersNeeded = maxBytes;
int safetyLimit = maxBytes * 4; // Prevent infinite loops
while (charactersNeeded > 0 && safetyLimit > 0) {
// Read only what we need, don't over-consume
int bytesToRead = Math.min(128, charactersNeeded);
RuntimeScalar chunk = delegate.doRead(bytesToRead, charset);
String chunkStr = chunk.toString();
if (chunkStr.isEmpty()) {
break; // EOF reached
}
safetyLimit -= chunkStr.length();
// Apply input pipeline to transform bytes to characters
String processed = inputPipeline.apply(chunkStr);
// Add the processed characters to the result
if (!processed.isEmpty()) {
int charsToTake = Math.min(processed.length(), charactersNeeded);
result.append(processed, 0, charsToTake);
charactersNeeded -= charsToTake;
// If we have extra characters, let the layer buffer them
if (processed.length() > charsToTake) {
// This should be handled by the layer's internal buffering
break;
}
}
}
return new RuntimeScalar(result.toString());
}
/**
* Sets or changes the IO layers on this handle, similar to Perl's binmode().
*
* <p>This method parses a Perl-style layer specification string and applies
* the requested layers to the handle. Any existing layers are removed and
* reset before applying the new ones.</p>
*
* <p>Supported layer specifications:</p>
* <ul>
* <li><b>:raw</b> or <b>:bytes</b> - Binary mode, no transformations</li>
* <li><b>:crlf</b> - CRLF/LF conversion layer</li>
* <li><b>:utf8</b> - UTF-8 encoding layer</li>
* <li><b>:encoding(name)</b> - Specific character encoding layer</li>
* </ul>
*
* <p>Examples:</p>
* <pre>
* handle.binmode(":raw"); // Binary mode
* handle.binmode(":utf8"); // UTF-8 text mode
* handle.binmode(":encoding(UTF-16):crlf"); // UTF-16 with CRLF conversion
* </pre>
*
* @param modeStr the layer specification string (may be null or empty for raw mode)
* @return RuntimeScalar(1) on success, RuntimeScalar(0) on failure
*/
public RuntimeScalar binmode(String modeStr) {
try {
// Reset all pipelines
inputPipeline = Function.identity();
outputPipeline = Function.identity();
// Reset and clear existing layers
for (IOLayer layer : activeLayers) {
layer.reset();
}
activeLayers.clear();
// Parse and apply new layers
parseAndSetLayers(modeStr);
return new RuntimeScalar(1);
} catch (Exception e) {
return new RuntimeScalar(0);
}
}
/**
* Parses the layer specification string and applies the layers.
*
* <p>This method handles the parsing of Perl-style layer strings,
* including proper handling of the :encoding(...) syntax.</p>
*
* @param modeStr the layer specification string
* @throws IllegalArgumentException if an unknown layer is specified
*/
private void parseAndSetLayers(String modeStr) {
if (modeStr == null || modeStr.isEmpty()) {
// Default to raw mode (no transformation)
return;
}
String[] layers = splitLayers(modeStr);
for (String layer : layers) {
if (layer.isEmpty()) continue;
addLayer(layer);
}
}
/**
* Splits a layer specification string into individual layer names.
*
* <p>This method handles the special case of :encoding(...) which contains
* parentheses that should not be split. For example:</p>
* <ul>
* <li>":raw:crlf" → [":raw", ":crlf"]</li>
* <li>":encoding(UTF-8):crlf" → [":encoding(UTF-8)", ":crlf"]</li>
* <li>"encoding(iso-8859-1)" → ["encoding(iso-8859-1)"]</li>
* </ul>
*
* @param modeStr the layer specification string to split
* @return array of individual layer specifications
*/
private String[] splitLayers(String modeStr) {
List<String> result = new ArrayList<>();
int start = 0;
int i = 0;
while (i < modeStr.length()) {
if (modeStr.charAt(i) == ':') {
// Found a layer separator
if (i > start) {
result.add(modeStr.substring(start, i));
}
start = i + 1;
i++;
} else if (modeStr.startsWith("encoding(", i)) {
// Handle encoding(...) specially to preserve parentheses
int closeIdx = modeStr.indexOf(')', i);
if (closeIdx != -1) {
// Extract everything before encoding() if any
if (i > start) {
result.add(modeStr.substring(start, i));
}
// Extract the complete encoding(...) specification
result.add(modeStr.substring(i, closeIdx + 1));
i = closeIdx + 1;
start = i;
// Skip separator if present after encoding()
if (i < modeStr.length() && modeStr.charAt(i) == ':') {
start++;
i++;
}
} else {
i++;
}
} else {
i++;
}
}
// Add any remaining content
if (start < modeStr.length()) {
result.add(modeStr.substring(start));
}
return result.toArray(new String[0]);
}
/**
* Adds a single layer to the IO handle.
*
* <p>This method creates the appropriate layer instance and adds it to
* both the active layers list and the input/output pipelines.</p>
*
* <p>The layers are applied in order, with each new layer wrapping
* the previous transformations. This matches Perl's layer stacking behavior.</p>
*
* @param layerSpec the layer specification (e.g., "crlf", "utf8", "encoding(UTF-16)")
* @throws IllegalArgumentException if the layer specification is unknown
*/
private void addLayer(String layerSpec) {
switch (layerSpec) {
case "bytes", "raw", "unix" -> {
// No-op layers - binary mode with no transformation
// These layers essentially remove other layers when used alone
}
case "crlf" -> {
// CRLF layer for line ending conversion
CrlfLayer layer = new CrlfLayer();
activeLayers.add(layer);
// Create transformation functions and compose them into pipelines
Function<String, String> inputTransform = s -> layer.processInput(s);
Function<String, String> outputTransform = s -> layer.processOutput(s);
inputPipeline = inputPipeline.andThen(inputTransform);
outputPipeline = outputPipeline.andThen(outputTransform);
}
case "utf8" -> {
// UTF-8 encoding layer - convenience alias for :encoding(UTF-8)
EncodingLayer layer = new EncodingLayer(StandardCharsets.UTF_8, "utf8");
activeLayers.add(layer);
Function<String, String> inputTransform = s -> layer.processInput(s);
Function<String, String> outputTransform = s -> layer.processOutput(s);
inputPipeline = inputPipeline.andThen(inputTransform);
outputPipeline = outputPipeline.andThen(outputTransform);
}
default -> {
// Check for :encoding(...) pattern
if (layerSpec.startsWith("encoding(") && layerSpec.endsWith(")")) {
// Extract charset name from encoding(name)
String charsetName = layerSpec.substring(9, layerSpec.length() - 1);
try {
Charset charset = Charset.forName(charsetName);
EncodingLayer layer = new EncodingLayer(charset, layerSpec);
activeLayers.add(layer);
Function<String, String> inputTransform = s -> layer.processInput(s);
Function<String, String> outputTransform = s -> layer.processOutput(s);
inputPipeline = inputPipeline.andThen(inputTransform);
outputPipeline = outputPipeline.andThen(outputTransform);
} catch (Exception e) {
throw new IllegalArgumentException("Unknown encoding: " + charsetName);
}
} else {
throw new IllegalArgumentException("Unknown layer: " + layerSpec);
}
}
}
}
/**
* Flushes any buffered data to the underlying handle.
*
* <p>This method passes through to the delegate's flush method.
* Individual layers don't typically buffer data in this implementation,
* so no layer-specific flushing is needed.</p>
*
* @return the result from the delegate's flush operation
*/
@Override
public RuntimeScalar flush() {
return delegate.flush();
}
/**
* Synchronizes data to physical storage (fsync).
*
* <p>This method passes through to the delegate's sync method.
* Use this only when you need guaranteed disk durability.</p>
*
* @return the result from the delegate's sync operation
* @see IOHandle#sync()
*/
@Override
public RuntimeScalar sync() {
return delegate.sync();
}
/**
* Closes the handle and cleans up all layers.
*
* <p>This method:</p>
* <ol>
* <li>Flushes any pending data</li>
* <li>Resets all active layers to clear their state</li>
* <li>Closes the underlying delegate handle</li>
* </ol>
*
* @return the result from the delegate's close operation
*/
@Override
public RuntimeScalar close() {
flush();
// Reset all layers to clear any internal state
for (IOLayer layer : activeLayers) {
layer.reset();
}
return delegate.close();
}
/**
* Returns the file descriptor number of the underlying handle.
*
* <p>This method passes through to the delegate, as layers don't
* affect the file descriptor.</p>
*
* @return the file descriptor number from the delegate
*/
@Override
public RuntimeScalar fileno() {
return delegate.fileno();
}
/**
* Checks if the end of file has been reached.
*
* <p>This method passes through to the delegate, as EOF detection
* happens at the underlying IO level.</p>
*
* @return true if EOF has been reached, false otherwise
*/
@Override
public RuntimeScalar eof() {
return delegate.eof();
}
/**
* Returns the current position in the file.
*
* <p>This method passes through to the delegate. Note that with
* encoding layers, the position is in terms of bytes in the underlying
* file, not characters.</p>
*
* @return the current file position from the delegate
*/
@Override
public RuntimeScalar tell() {
return delegate.tell();
}
/**
* Seeks to a new position in the file.
*
* <p>This method resets all layers before seeking, as seeking can
* invalidate any partial character sequences or other stateful
* information maintained by layers.</p>
*
* <p>This matches Perl's behavior where seeking clears layer buffers.</p>
*
* @param pos the position to seek to
* @param whence the seek mode (SEEK_SET, SEEK_CUR, or SEEK_END)
* @return the result from the delegate's seek operation
*/
@Override
public RuntimeScalar seek(long pos, int whence) {
// Reset all layers when seeking to clear any partial state
for (IOLayer layer : activeLayers) {
layer.reset();
}
return delegate.seek(pos, whence);
}
/**
* Truncates the file to the specified length.
*
* <p>This method passes through to the delegate, as truncation
* operates at the file level below any layer transformations.</p>
*
* @param length the new file length
* @return the result from the delegate's truncate operation
*/
@Override
public RuntimeScalar truncate(long length) {
return delegate.truncate(length);
}
public String getCurrentLayers() {
// Return the currently applied layers as a string
StringBuilder layers = new StringBuilder();
for (IOLayer layer : this.activeLayers) {
if (layer instanceof CrlfLayer) {
layers.append(":crlf");
} else if (layer instanceof EncodingLayer) {
// You might need to store the encoding name
layers.append(":encoding");
}
// Add other layer types as needed
}
return layers.toString();
}
}