|
private static class IcebergToArrowTypeConverter extends TypeUtil.SchemaVisitor<Field> { |
|
private final NestedField currentField; |
|
|
|
IcebergToArrowTypeConverter(NestedField field) { |
|
this.currentField = field; |
|
} |
|
|
|
@Override |
|
public Field schema(org.apache.iceberg.Schema schema, Field structResult) { |
|
return structResult; |
|
} |
|
|
|
@Override |
|
public Field struct(StructType struct, List<Field> fieldResults) { |
|
return new Field( |
|
currentField.name(), |
|
new FieldType(currentField.isOptional(), ArrowType.Struct.INSTANCE, null), |
|
convertChildren(struct.fields())); |
|
} |
|
|
|
@Override |
|
public Field field(NestedField field, Field fieldResult) { |
|
return fieldResult; |
|
} |
|
|
|
@Override |
|
public Field list(ListType list, Field elementResult) { |
|
return new Field( |
|
currentField.name(), |
|
new FieldType(currentField.isOptional(), ArrowType.List.INSTANCE, null), |
|
convertChildren(list.fields())); |
|
} |
|
|
|
@Override |
|
public Field map(MapType map, Field keyResult, Field valueResult) { |
|
Map<String, String> metadata = ImmutableMap.of(ORIGINAL_TYPE, MAP_TYPE); |
|
ArrowType arrowType = new ArrowType.Map(false); |
|
|
|
List<Field> entryFields = convertChildren(map.fields()); |
|
|
|
Field entry = |
|
new Field("", new FieldType(currentField.isOptional(), arrowType, null), entryFields); |
|
List<Field> children = Lists.newArrayList(entry); |
|
|
|
return new Field( |
|
currentField.name(), |
|
new FieldType(currentField.isOptional(), arrowType, null, metadata), |
|
children); |
|
} |
|
|
|
private List<Field> convertChildren(Collection<NestedField> children) { |
|
List<Field> converted = Lists.newArrayListWithCapacity(children.size()); |
|
|
|
for (NestedField child : children) { |
|
converted.add(TypeUtil.visit(child.type(), new IcebergToArrowTypeConverter(child))); |
|
} |
|
|
|
return converted; |
|
} |
|
|
|
@Override |
|
public Field primitive(Type.PrimitiveType primitive) { |
|
final ArrowType arrowType; |
|
|
|
switch (primitive.typeId()) { |
|
case BINARY: |
|
arrowType = ArrowType.Binary.INSTANCE; |
|
break; |
|
case FIXED: |
|
final Types.FixedType fixedType = (Types.FixedType) primitive; |
|
arrowType = new ArrowType.FixedSizeBinary(fixedType.length()); |
|
break; |
|
case BOOLEAN: |
|
arrowType = ArrowType.Bool.INSTANCE; |
|
break; |
|
case INTEGER: |
|
arrowType = new ArrowType.Int(Integer.SIZE, true /* signed */); |
|
break; |
|
case LONG: |
|
arrowType = new ArrowType.Int(Long.SIZE, true /* signed */); |
|
break; |
|
case FLOAT: |
|
arrowType = new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE); |
|
break; |
|
case DOUBLE: |
|
arrowType = new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE); |
|
break; |
|
case DECIMAL: |
|
final Types.DecimalType decimalType = (Types.DecimalType) primitive; |
|
arrowType = new ArrowType.Decimal(decimalType.precision(), decimalType.scale(), 128); |
|
break; |
|
case STRING: |
|
arrowType = ArrowType.Utf8.INSTANCE; |
|
break; |
|
case TIME: |
|
arrowType = new ArrowType.Time(TimeUnit.MICROSECOND, Long.SIZE); |
|
break; |
|
case UUID: |
|
arrowType = new ArrowType.FixedSizeBinary(16); |
|
break; |
|
case TIMESTAMP: |
|
arrowType = |
|
new ArrowType.Timestamp( |
|
TimeUnit.MICROSECOND, |
|
((Types.TimestampType) primitive).shouldAdjustToUTC() ? "UTC" : null); |
|
break; |
|
case TIMESTAMP_NANO: |
|
arrowType = |
|
new ArrowType.Timestamp( |
|
TimeUnit.NANOSECOND, |
|
((Types.TimestampNanoType) primitive).shouldAdjustToUTC() ? "UTC" : null); |
|
break; |
|
case DATE: |
|
arrowType = new ArrowType.Date(DateUnit.DAY); |
|
break; |
|
default: |
|
throw new UnsupportedOperationException("Unsupported primitive type: " + primitive); |
|
} |
|
|
|
return new Field( |
|
currentField.name(), |
|
new FieldType(currentField.isOptional(), arrowType, null), |
|
Lists.newArrayList()); |
|
} |
|
} |
Feature Request / Improvement
Inspired by apache/iceberg-python#3098, which documents mapping between pyarrow and pyiceberg types
Think its useful to document Iceberg Type <> Arrow type?
Maybe just put this into writing
iceberg/arrow/src/main/java/org/apache/iceberg/arrow/ArrowSchemaUtil.java
Lines 68 to 192 in c9f6c84
Query engine
None
Willingness to contribute