Skip to content

Commit 6e6b4aa

Browse files
committed
update
1 parent f327623 commit 6e6b4aa

2 files changed

Lines changed: 10 additions & 56 deletions

File tree

parquet-avro/src/main/java/org/apache/parquet/avro/AvroSchemaConverter.java

Lines changed: 5 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -83,26 +83,12 @@ public class AvroSchemaConverter {
8383

8484
public static final String ADD_LIST_ELEMENT_RECORDS = "parquet.avro.add-list-element-records";
8585
private static final boolean ADD_LIST_ELEMENT_RECORDS_DEFAULT = true;
86-
public static final String AVRO_MAX_RECURSION = "parquet.avro.max-recursion";
87-
private static final int AVRO_MAX_RECURSION_DEFAULT = 10;
8886

8987
private final boolean assumeRepeatedIsListElement;
9088
private final boolean writeOldListStructure;
9189
private final boolean writeParquetUUID;
9290
private final boolean readInt96AsFixed;
9391
private final Set<String> pathsToInt96;
94-
private final int maxRecursion;
95-
96-
/**
97-
* Sets the maximum recursion depth for recursive schemas.
98-
*
99-
* @param config The hadoop configuration to be updated.
100-
* @param maxRecursion The maximum recursion depth schemas are allowed to go before terminating
101-
* with an UnsupportedOperationException instead of their actual schema.
102-
*/
103-
public static void setMaxRecursion(Configuration config, int maxRecursion) {
104-
config.setInt(AVRO_MAX_RECURSION, maxRecursion);
105-
}
10692

10793
public AvroSchemaConverter() {
10894
this(ADD_LIST_ELEMENT_RECORDS_DEFAULT, READ_INT96_AS_FIXED_DEFAULT);
@@ -121,7 +107,6 @@ public AvroSchemaConverter() {
121107
this.writeParquetUUID = WRITE_PARQUET_UUID_DEFAULT;
122108
this.readInt96AsFixed = readInt96AsFixed;
123109
this.pathsToInt96 = Collections.emptySet();
124-
this.maxRecursion = AVRO_MAX_RECURSION_DEFAULT;
125110
}
126111

127112
public AvroSchemaConverter(Configuration conf) {
@@ -134,7 +119,6 @@ public AvroSchemaConverter(ParquetConfiguration conf) {
134119
this.writeParquetUUID = conf.getBoolean(WRITE_PARQUET_UUID, WRITE_PARQUET_UUID_DEFAULT);
135120
this.readInt96AsFixed = conf.getBoolean(READ_INT96_AS_FIXED, READ_INT96_AS_FIXED_DEFAULT);
136121
this.pathsToInt96 = new HashSet<>(Arrays.asList(conf.getStrings(WRITE_FIXED_AS_INT96, new String[0])));
137-
this.maxRecursion = conf.getInt(AVRO_MAX_RECURSION, AVRO_MAX_RECURSION_DEFAULT);
138122
}
139123

140124
/**
@@ -213,13 +197,13 @@ private Type convertField(
213197
LogicalType logicalType = schema.getLogicalType();
214198

215199
if (type.equals(Schema.Type.RECORD) || type.equals(Schema.Type.ENUM) || type.equals(Schema.Type.FIXED)) {
216-
Integer depth = seenSchemas.get(schema);
217-
if (depth != null && depth >= maxRecursion) {
218-
throw new UnsupportedOperationException("Recursive Avro schemas are not supported by parquet-avro: "
219-
+ schema.getFullName() + " (exceeded maximum recursion depth of " + maxRecursion + ")");
200+
// If this schema has already been seen in the current branch, we have a recursion loop
201+
if (seenSchemas.containsKey(schema)) {
202+
throw new UnsupportedOperationException(
203+
"Recursive Avro schemas are not supported by parquet-avro: " + schema.getFullName());
220204
}
221205
seenSchemas = new IdentityHashMap<>(seenSchemas);
222-
seenSchemas.put(schema, depth == null ? 1 : depth + 1);
206+
seenSchemas.put(schema, 1);
223207
}
224208

225209
Types.PrimitiveBuilder<PrimitiveType> builder;

parquet-avro/src/test/java/org/apache/parquet/avro/TestAvroSchemaConverter.java

Lines changed: 5 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -980,7 +980,7 @@ public void testRecursiveSchemaThrowsException() {
980980
Schema recursiveSchema = new Schema.Parser().parse(recursiveSchemaJson);
981981

982982
assertThrows(
983-
"Recursive Avro schema should throw UnsupportedOperationException",
983+
"Recursive Avro schema should throw UnsupportedOperationException for cycles",
984984
UnsupportedOperationException.class,
985985
() -> new AvroSchemaConverter().convert(recursiveSchema));
986986
}
@@ -1011,7 +1011,7 @@ public void testRecursiveSchemaFromGitHubIssue() {
10111011
Schema issueSchema = new Schema.Parser().parse(issueSchemaJson);
10121012

10131013
assertThrows(
1014-
"Schema should throw UnsupportedOperationException",
1014+
"Schema hould throw UnsupportedOperationException for cycles",
10151015
UnsupportedOperationException.class,
10161016
() -> new AvroSchemaConverter().convert(issueSchema));
10171017
}
@@ -1025,41 +1025,11 @@ public void testRecursiveSchemaErrorMessage() {
10251025

10261026
Schema recursiveSchema = new Schema.Parser().parse(recursiveSchemaJson);
10271027

1028-
try {
1029-
new AvroSchemaConverter().convert(recursiveSchema);
1030-
Assert.fail("Expected UnsupportedOperationException");
1031-
} catch (UnsupportedOperationException e) {
1032-
String message = e.getMessage();
1033-
Assert.assertTrue(
1034-
"Error message should mention recursion",
1035-
message.contains("Recursive Avro schemas are not supported"));
1036-
Assert.assertTrue("Error message should mention schema name", message.contains("TestRecord"));
1037-
Assert.assertTrue(
1038-
"Error message should mention max recursion depth", message.contains("maximum recursion depth"));
1039-
}
1040-
}
1041-
1042-
@Test
1043-
public void testConfigurableMaxRecursion() {
1044-
String recursiveSchemaJson = "{"
1045-
+ "\"type\": \"record\", \"name\": \"Node\", \"fields\": ["
1046-
+ " {\"name\": \"child\", \"type\": [\"null\", \"Node\"], \"default\": null}"
1047-
+ "]}";
1048-
1049-
Schema recursiveSchema = new Schema.Parser().parse(recursiveSchemaJson);
1050-
Configuration conf = new Configuration();
1051-
1052-
AvroSchemaConverter.setMaxRecursion(conf, 1);
1028+
// With our cycle detection fix, this should throw UnsupportedOperationException
10531029
assertThrows(
1054-
"Should fail with max recursion 1",
1030+
"Recursive schema should throw UnsupportedOperationException with clear error message",
10551031
UnsupportedOperationException.class,
1056-
() -> new AvroSchemaConverter(conf).convert(recursiveSchema));
1057-
1058-
AvroSchemaConverter.setMaxRecursion(conf, 5);
1059-
assertThrows(
1060-
"Should fail with max recursion 5",
1061-
UnsupportedOperationException.class,
1062-
() -> new AvroSchemaConverter(conf).convert(recursiveSchema));
1032+
() -> new AvroSchemaConverter().convert(recursiveSchema));
10631033
}
10641034

10651035
@Test

0 commit comments

Comments
 (0)