Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions docs/types.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,13 @@ protarrow.ProtarrowConfig(
)
```

## Date range limitation

`google.type.Date` is converted through Python's `datetime.date`, which only supports dates from
`0001-01-01` to `9999-12-31`. Proto `Date` values outside this range (e.g. `Date(year=0, month=0, day=0)`)
cannot be represented as `datetime.date`. These values are stored using a special sentinel value and
will round-trip back as `Date(year=0, month=0, day=0)` regardless of the original month and day.

## Nullability

By default, nullability follows the convention imposed by protobuf:
Expand Down
15 changes: 10 additions & 5 deletions protarrow/arrow_to_proto.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,13 @@
from google.type.date_pb2 import Date
from google.type.timeofday_pb2 import TimeOfDay

from protarrow.common import M, is_binary_enum, is_string_enum, offset_values_array
from protarrow.common import (
_INVALID_DATE_SENTINEL,
M,
is_binary_enum,
is_string_enum,
offset_values_array,
)

_NANOS_PER_UNIT = {"ns": 1, "us": 1_000, "ms": 1_000_000, "s": 1_000_000_000}
_TIME_CONVERTER = {
Expand Down Expand Up @@ -59,11 +65,10 @@ def _timestamp_s_scalar_to_proto(scalar: pa.TimestampScalar) -> Timestamp:


def _date_scalar_to_proto(scalar: pa.Date32Scalar) -> Date:
date: datetime.date = scalar.as_py()
if date == datetime.date.min:
if scalar.value == _INVALID_DATE_SENTINEL:
return Date()
else:
return Date(year=date.year, month=date.month, day=date.day)
date: datetime.date = scalar.as_py()
return Date(year=date.year, month=date.month, day=date.day)


def _time_64_ns_scalar_to_proto(scalar: pa.Time64Scalar) -> TimeOfDay:
Expand Down
2 changes: 2 additions & 0 deletions protarrow/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

M = TypeVar("M", bound=Message)

_INVALID_DATE_SENTINEL = -719163

SUPPORTED_ENUM_TYPES = (
pa.int32(),
pa.binary(),
Expand Down
17 changes: 12 additions & 5 deletions protarrow/proto_to_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,13 @@
from google.type.date_pb2 import Date
from google.type.timeofday_pb2 import TimeOfDay

from protarrow.common import M, ProtarrowConfig, is_binary_enum, is_string_enum
from protarrow.common import (
_INVALID_DATE_SENTINEL,
M,
ProtarrowConfig,
is_binary_enum,
is_string_enum,
)

_PROTO_DESCRIPTOR_TO_PYARROW = {
BoolValue.DESCRIPTOR: pa.bool_(),
Expand Down Expand Up @@ -89,15 +95,16 @@ def _time_of_day_to_seconds(time_of_day: TimeOfDay) -> int:
return (time_of_day.hours * 60 + time_of_day.minutes) * 60 + time_of_day.seconds


def _proto_date_to_py_date(proto_date: Date) -> datetime.date:
def _proto_date_to_date32(proto_date: Date) -> int:
if proto_date.year == 0:
return datetime.date.min
return _INVALID_DATE_SENTINEL
else:
return datetime.date(proto_date.year, proto_date.month, proto_date.day)
date = datetime.date(proto_date.year, proto_date.month, proto_date.day)
return date.toordinal() + _INVALID_DATE_SENTINEL


_PROTO_DESCRIPTOR_TO_ARROW_CONVERTER = {
Date.DESCRIPTOR: _proto_date_to_py_date,
Date.DESCRIPTOR: _proto_date_to_date32,
TimeOfDay.DESCRIPTOR: _time_of_day_to_nanos,
BoolValue.DESCRIPTOR: operator.attrgetter("value"),
BytesValue.DESCRIPTOR: operator.attrgetter("value"),
Expand Down
2 changes: 1 addition & 1 deletion tests/random_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def random_duration() -> Duration:


def random_date() -> Date:
date = datetime.date.min + datetime.timedelta(days=random.randint(0, 3652058))
date = datetime.date.fromordinal(random.randint(1, 3652059))
return Date(year=date.year, month=date.month, day=date.day)


Expand Down
57 changes: 57 additions & 0 deletions tests/test_coverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import pytest
from google.protobuf.descriptor import Descriptor, EnumDescriptor, FieldDescriptor
from google.protobuf.wrappers_pb2 import BoolValue, DoubleValue
from google.type.date_pb2 import Date

import protarrow
from protarrow import cast_record_batch
Expand Down Expand Up @@ -575,3 +576,59 @@ def test_dict_binary_enum_with_large_binary(self):
binary_type=pa.large_binary(),
)
assert config.enum_type == pa.dictionary(pa.int32(), pa.binary())


def test_date_behavior():
assert pa.scalar(0, pa.date32()).as_py() == datetime.date(1970, 1, 1)
assert pa.scalar(-1, pa.date32()).as_py() == datetime.date(1969, 12, 31)
assert pa.scalar(-719162, pa.date32()).as_py() == datetime.date(1, 1, 1)
with pytest.raises(OverflowError, match=r"date value out of range"):
assert pa.scalar(-719163, pa.date32()).as_py()

assert datetime.date(1970, 1, 1).toordinal() == 719163
assert datetime.date.min.toordinal() == 1
assert datetime.date.max.toordinal() == 3652059

assert datetime.date.fromordinal(1) == datetime.date.min
assert datetime.date.fromordinal(3652059) == datetime.date.max


def test_bad_date():
default_date = Date()
min_date = Date(year=1, month=1, day=1)

assert default_date != min_date


def test_can_pass_min_max_date():
default_date = Date()
bad_date = Date(year=0, month=1, day=1)
min_date = Date(year=1, month=1, day=1)
max_date = Date(year=9999, month=12, day=31)

messages = [
ExampleMessage(date_value=default_date),
ExampleMessage(date_value=bad_date),
ExampleMessage(date_value=min_date),
ExampleMessage(date_value=max_date),
]
table = protarrow.messages_to_table(messages, ExampleMessage)
assert table["date_value"] == pa.chunked_array(
pa.array(
[
-719163,
-719163,
-719162,
2932896,
],
type=pa.date32(),
)
)

messages_back = protarrow.table_to_messages(table, ExampleMessage)
assert messages_back == [
ExampleMessage(date_value=default_date),
ExampleMessage(date_value=default_date), # This changed
ExampleMessage(date_value=min_date),
ExampleMessage(date_value=max_date),
]
Loading