-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathTextCsv.java
More file actions
257 lines (211 loc) · 8.61 KB
/
TextCsv.java
File metadata and controls
257 lines (211 loc) · 8.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
package org.perlonjava.runtime.perlmodule;
import org.apache.commons.csv.*;
import org.perlonjava.runtime.runtimetypes.*;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.List;
import static org.perlonjava.runtime.runtimetypes.RuntimeScalarCache.*;
import static org.perlonjava.runtime.runtimetypes.RuntimeScalarType.JAVAOBJECT;
/**
* Text::CSV module implementation for PerlOnJava.
* This class provides CSV parsing and generation using Apache Commons CSV.
*/
public class TextCsv extends PerlModuleBase {
// Error codes matching Perl's Text::CSV
private static final int INI_SEPARATOR_CONFLICT = 1001;
private static final int EIF_LOOSE_UNESCAPED_QUOTE = 2034;
private static final int EIQ_QUOTED_FIELD_NOT_TERMINATED = 2027;
private static final int ECB_BINARY_CHARACTER = 2110;
private static final String cacheKey = "_CSVFormat";
/**
* Constructor initializes the Text::CSV module.
*/
public TextCsv() {
super("Text::CSV", false);
}
/**
* Initializes and registers all Text::CSV methods.
*
* NOTE: Registration is intentionally disabled because Text::CSV now
* delegates to Text::CSV_XS (which inherits from Text::CSV_PP).
* Registering Java-backed parse/combine on "Text::CSV" would override
* the pure-Perl implementations inherited through the CPAN wrapper.
*/
public static void initialize() {
// No-op: Java-backed CSV methods are no longer used.
// The CPAN Text::CSV wrapper + Text::CSV_PP handle everything.
}
/**
* Parse a CSV line.
*/
public static RuntimeList parse(RuntimeArray args, int ctx) {
if (args.size() < 2) {
return scalarFalse.getList();
}
RuntimeHash self = args.get(0).hashDeref();
RuntimeScalar line = args.get(1);
RuntimeArray fields = new RuntimeArray();
if (line.toString().isEmpty()) {
// Perl Text::CSV treats an empty input string ("") as a single empty field
RuntimeArray.push(fields, scalarEmptyString);
self.put("_fields", fields.createReference());
self.put("_string", line);
clearError(self);
return scalarTrue.getList();
}
try {
// Build CSV format from attributes
CSVFormat format = buildCSVFormat(self);
// Parse the line
CSVParser parser = CSVParser.parse(line.toString(), format);
List<CSVRecord> records = parser.getRecords();
if (!records.isEmpty()) {
CSVRecord record = records.get(0);
int fieldIndex = 0;
for (String field : record) {
RuntimeScalar value = new RuntimeScalar(field);
// Handle blank_is_undef
if (self.get("blank_is_undef").getBoolean() && field.isEmpty()) {
value = scalarUndef;
}
// Handle empty_is_undef
if (self.get("empty_is_undef").getBoolean() && field.isEmpty()) {
value = scalarUndef;
}
RuntimeArray.push(fields, value);
fieldIndex++;
}
self.put("_fields", fields.createReference());
self.put("_string", line);
clearError(self);
return scalarTrue.getList();
}
return scalarFalse.getList();
} catch (Exception e) {
setError(self, EIQ_QUOTED_FIELD_NOT_TERMINATED, e.getMessage(), 0, 0);
return scalarFalse.getList();
}
}
/**
* Combine fields into a CSV string.
*/
public static RuntimeList combine(RuntimeArray args, int ctx) {
if (args.size() < 2) {
return scalarFalse.getList();
}
RuntimeHash self = args.get(0).hashDeref();
try {
// Build CSV format
CSVFormat format = buildCSVFormat(self);
// Get fields from arguments
List<String> values = new ArrayList<>();
for (int i = 1; i < args.size(); i++) {
RuntimeScalar field = args.get(i);
if (field.type == RuntimeScalarType.UNDEF) {
values.add("");
} else {
values.add(field.toString());
}
}
// Generate CSV string
StringWriter sw = new StringWriter();
CSVPrinter printer = new CSVPrinter(sw, format);
printer.printRecord(values);
printer.flush();
String csvString = sw.toString();
// Remove any trailing line terminators (handles \n, \r\n, or \r)
csvString = csvString.replaceAll("[\r\n]+$", "");
self.put("_string", new RuntimeScalar(csvString));
clearError(self);
return scalarTrue.getList();
} catch (Exception e) {
setError(self, ECB_BINARY_CHARACTER, e.getMessage(), 0, 0);
return scalarFalse.getList();
}
}
/**
* Build CSVFormat from attributes.
*/
private static CSVFormat buildCSVFormat(RuntimeHash self) {
RuntimeScalar cached = self.get(cacheKey);
if (cached != null && cached.type == JAVAOBJECT) {
return (CSVFormat) cached.value;
}
// Start with RFC4180 format which handles quote doubling correctly
CSVFormat.Builder builder = CSVFormat.RFC4180.builder();
// Set delimiter
String sepChar = self.get("sep_char").toString();
if (sepChar.length() == 1) {
builder.setDelimiter(sepChar.charAt(0));
}
// Set quote character
RuntimeScalar quoteChar = self.get("quote_char");
if (quoteChar.type != RuntimeScalarType.UNDEF && quoteChar.toString().length() == 1) {
builder.setQuote(quoteChar.toString().charAt(0));
} else if (quoteChar.type == RuntimeScalarType.UNDEF) {
builder.setQuote(null);
}
// Handle escape character properly
RuntimeScalar escapeChar = self.get("escape_char");
// In standard CSV, when escape_char is undef, we should NOT set an escape character
// This allows quote doubling to work properly
if (escapeChar.type == RuntimeScalarType.UNDEF) {
builder.setEscape(null);
} else {
// Check if escape_char was explicitly set to something different from quote_char
String escapeStr = escapeChar.toString();
String quoteStr = quoteChar.toString();
if (!escapeStr.equals(quoteStr) && escapeStr.length() == 1) {
builder.setEscape(escapeStr.charAt(0));
} else {
// If escape_char equals quote_char or is empty, use quote doubling
builder.setEscape(null);
}
}
// Handle other options
if (self.get("allow_whitespace").getBoolean()) {
builder.setIgnoreSurroundingSpaces(true);
}
if (self.get("always_quote").getBoolean()) {
builder.setQuoteMode(QuoteMode.ALL);
}
// Don't set record separator for parsing single lines
builder.setRecordSeparator(null);
// Use get() instead of deprecated build()
CSVFormat csvFormat = builder.get();
cached.set(csvFormat);
return csvFormat;
}
/**
* Set error information using Perl calling convention.
*/
private static void setError(RuntimeHash self, int code, String message, int pos, int field) {
// Call Perl _set_error method
RuntimeArray args = new RuntimeArray();
RuntimeArray.push(args, self.createReference());
RuntimeArray.push(args, new RuntimeScalar(code));
RuntimeArray.push(args, new RuntimeScalar(message));
RuntimeArray.push(args, new RuntimeScalar(pos));
RuntimeArray.push(args, new RuntimeScalar(field));
// Call the Perl method
RuntimeCode.apply(
GlobalVariable.getGlobalCodeRef("Text::CSV::_set_error"),
args,
RuntimeContextType.SCALAR
);
}
/**
* Clear error state using Perl calling convention.
*/
private static void clearError(RuntimeHash self) {
// Call Perl _clear_error method
RuntimeArray args = new RuntimeArray();
RuntimeArray.push(args, self.createReference());
// Call the Perl method
RuntimeCode.apply(
GlobalVariable.getGlobalCodeRef("Text::CSV::_clear_error"),
args,
RuntimeContextType.SCALAR
);
}
}