From 9f70011f276572d1a62f4c90f607e1b293f0998b Mon Sep 17 00:00:00 2001 From: Cedric Holzer Date: Sun, 12 Apr 2026 22:15:15 +0200 Subject: [PATCH] AVRO-4238: [java] Fix adding union fields with array default value When FastReader was enabled and a field of type Union with an Array as default value was added, an AvroRuntimeException occurred during Schema Evolution. This change resolves this bug in FastReaderBuilder.java, allowing the Schema Migration to succeed as specified. --- .../org/apache/avro/io/FastReaderBuilder.java | 11 +++- ...estReadingWritingDataInEvolvedSchemas.java | 51 ++++++++++++------- 2 files changed, 42 insertions(+), 20 deletions(-) diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/FastReaderBuilder.java b/lang/java/avro/src/main/java/org/apache/avro/io/FastReaderBuilder.java index 7ff80e2a4f5..a17b2aa32b0 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/FastReaderBuilder.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/FastReaderBuilder.java @@ -197,7 +197,16 @@ private ExecutionStep getDefaultingStep(Schema.Field field) throws IOException { } else if (defaultValue instanceof Utf8) { return createFieldSetter(field, reusingReader((old, d) -> readUtf8(old, (Utf8) defaultValue))); } else if (defaultValue instanceof List && ((List) defaultValue).isEmpty()) { - return createFieldSetter(field, reusingReader((old, d) -> data.newArray(old, 0, field.schema()))); + Schema arraySchema = field.schema(); + if (arraySchema.getType() == Schema.Type.UNION) { + arraySchema = arraySchema.getTypes().stream() + .filter(nestedSchema -> nestedSchema.getType() == Schema.Type.ARRAY).findFirst() + .orElseThrow(() -> new AvroTypeException(String.format( + "Union schema %s has a default value of type Array, but none of the union types is of type Array", + field.schema().toString()))); + } + final Schema schema = arraySchema; + return createFieldSetter(field, reusingReader((old, d) -> data.newArray(old, 0, schema))); } else if (defaultValue instanceof Map && ((Map) defaultValue).isEmpty()) { return createFieldSetter(field, reusingReader((old, d) -> data.newMap(old, 0))); } else { diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestReadingWritingDataInEvolvedSchemas.java b/lang/java/avro/src/test/java/org/apache/avro/TestReadingWritingDataInEvolvedSchemas.java index 2b392eb4efd..e72868a5bd7 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestReadingWritingDataInEvolvedSchemas.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestReadingWritingDataInEvolvedSchemas.java @@ -17,17 +17,14 @@ */ package org.apache.avro; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; - import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import java.util.Collection; -import java.util.stream.Stream; + +import static java.util.Collections.emptyList; +import static org.junit.jupiter.api.Assertions.*; import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericData.EnumSymbol; @@ -41,7 +38,6 @@ import org.apache.avro.io.Encoder; import org.apache.avro.io.EncoderFactory; -import org.junit.jupiter.api.Assertions; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.EnumSource; @@ -122,6 +118,11 @@ public class TestReadingWritingDataInEvolvedSchemas { .name(FIELD_A).type().unionOf().floatType().and().doubleType().endUnion().noDefault() // .endRecord(); + private static final Schema UNION_WITH_EMPTY_ARRAY_DEFAULT_RECORD = SchemaBuilder.record(RECORD_A) // + .fields() // + .name(FIELD_A).type().unionOf().array().items(INT_RECORD).and().nullType().endUnion().arrayDefault(emptyList()) // + .endRecord(); + enum EncoderType { BINARY, JSON } @@ -232,9 +233,9 @@ void doubleWrittenWithUnionSchemaIsNotConvertedToFloatSchema(EncoderType encoder Schema writer = UNION_INT_LONG_FLOAT_DOUBLE_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42.0); byte[] encoded = encodeGenericBlob(record, encoderType); - AvroTypeException exception = Assertions.assertThrows(AvroTypeException.class, + AvroTypeException exception = assertThrows(AvroTypeException.class, () -> decodeGenericBlob(FLOAT_RECORD, writer, encoded, encoderType)); - Assertions.assertEquals("Found double, expecting float", exception.getMessage()); + assertEquals("Found double, expecting float", exception.getMessage()); } @ParameterizedTest @@ -243,9 +244,9 @@ void floatWrittenWithUnionSchemaIsNotConvertedToLongSchema(EncoderType encoderTy Schema writer = UNION_INT_LONG_FLOAT_DOUBLE_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42.0f); byte[] encoded = encodeGenericBlob(record, encoderType); - AvroTypeException exception = Assertions.assertThrows(AvroTypeException.class, + AvroTypeException exception = assertThrows(AvroTypeException.class, () -> decodeGenericBlob(LONG_RECORD, writer, encoded, encoderType)); - Assertions.assertEquals("Found float, expecting long", exception.getMessage()); + assertEquals("Found float, expecting long", exception.getMessage()); } @ParameterizedTest @@ -254,9 +255,9 @@ void longWrittenWithUnionSchemaIsNotConvertedToIntSchema(EncoderType encoderType Schema writer = UNION_INT_LONG_FLOAT_DOUBLE_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42L); byte[] encoded = encodeGenericBlob(record, encoderType); - AvroTypeException exception = Assertions.assertThrows(AvroTypeException.class, + AvroTypeException exception = assertThrows(AvroTypeException.class, () -> decodeGenericBlob(INT_RECORD, writer, encoded, encoderType)); - Assertions.assertEquals("Found long, expecting int", exception.getMessage()); + assertEquals("Found long, expecting int", exception.getMessage()); } @ParameterizedTest @@ -342,9 +343,9 @@ void enumRecordWithExtendedSchemaCanNotBeReadIfNewValuesAreUsed(EncoderType enco Record record = defaultRecordWithSchema(writer, FIELD_A, new EnumSymbol(ENUM_ABC, "C")); byte[] encoded = encodeGenericBlob(record, encoderType); - AvroTypeException exception = Assertions.assertThrows(AvroTypeException.class, + AvroTypeException exception = assertThrows(AvroTypeException.class, () -> decodeGenericBlob(ENUM_AB_RECORD, writer, encoded, encoderType)); - Assertions.assertEquals("No match for C", exception.getMessage()); + assertEquals("No match for C", exception.getMessage()); } @ParameterizedTest @@ -363,9 +364,9 @@ void recordWrittenWithExtendedSchemaCanBeReadWithOriginalSchemaButLossOfData(Enc assertEquals(42, decoded.get(FIELD_A)); try { decoded.get("newTopField"); - Assertions.fail("get should throw a exception"); + fail("get should throw a exception"); } catch (AvroRuntimeException ex) { - Assertions.assertEquals("Not a valid schema field: newTopField", ex.getMessage()); + assertEquals("Not a valid schema field: newTopField", ex.getMessage()); } } @@ -379,9 +380,9 @@ void readerWithoutDefaultValueThrowsException(EncoderType encoderType) throws Ex .endRecord(); Record record = defaultRecordWithSchema(INT_RECORD, FIELD_A, 42); byte[] encoded = encodeGenericBlob(record, encoderType); - AvroTypeException exception = Assertions.assertThrows(AvroTypeException.class, + AvroTypeException exception = assertThrows(AvroTypeException.class, () -> decodeGenericBlob(reader, INT_RECORD, encoded, encoderType)); - Assertions.assertTrue(exception.getMessage().contains("missing required field newField"), exception.getMessage()); + assertTrue(exception.getMessage().contains("missing required field newField"), exception.getMessage()); } @ParameterizedTest @@ -399,6 +400,18 @@ void readerWithDefaultValueIsApplied(EncoderType encoderType) throws Exception { assertEquals(314, decoded.get("newFieldWithDefault")); } + @ParameterizedTest + @EnumSource(EncoderType.class) + void readerWithEmptyListAsDefaultValueForUnionFieldIsApplied(EncoderType encoderType) throws Exception { + Schema writer = SchemaBuilder.record(RECORD_A) // + .fields() // + .endRecord(); + Record record = new GenericData.Record(writer); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(UNION_WITH_EMPTY_ARRAY_DEFAULT_RECORD, writer, encoded, encoderType); + assertEquals(emptyList(), decoded.get(FIELD_A)); + } + @ParameterizedTest @EnumSource(EncoderType.class) void aliasesInSchema(EncoderType encoderType) throws Exception {