Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions cognite/client/utils/_pandas_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def convert_nullable_int_cols(df: pd.DataFrame) -> pd.DataFrame:

def convert_timestamp_columns_to_datetime(df: pd.DataFrame) -> pd.DataFrame:
to_convert = df.columns.intersection(TIME_ATTRIBUTES)
df[to_convert] = (1_000_000 * df[to_convert]).astype("datetime64[ns]")
df[to_convert] = df[to_convert].astype("datetime64[ms]")
return df


Expand Down Expand Up @@ -381,12 +381,16 @@ def _create_timestamp_index(

match timestamps, timezone:
case list(), None:
return pd.to_datetime(timestamps, unit="ms")
return pd.to_datetime(timestamps, unit="ms").as_unit("ms")
case list(), _:
return pd.to_datetime(timestamps, unit="ms", utc=True).tz_convert(convert_tz_for_pandas(timezone))
return (
pd.to_datetime(timestamps, unit="ms", utc=True)
.tz_convert(convert_tz_for_pandas(timezone))
.as_unit("ms")
)
case np.ndarray(), None:
return pd.to_datetime(timestamps)
return pd.to_datetime(timestamps).as_unit("ms")
case np.ndarray(), _:
return pd.to_datetime(timestamps, utc=True).tz_convert(convert_tz_for_pandas(timezone))
return pd.to_datetime(timestamps, utc=True).as_unit("ms").tz_convert(convert_tz_for_pandas(timezone))
case _:
raise TypeError("Timestamps must be either list[int] or numpy.ndarray")
498 changes: 302 additions & 196 deletions poetry.lock

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ def test_iterate_data_subscription_datapoints_added(
{time_series_external_ids[0]: new_values},
index=pd.date_range(start=start, periods=2, freq="1d"),
)
new_timestamps = new_data.index.asi8 // 10**6
new_timestamps = new_data.index.as_unit("ms").asi8
try:
cognite_client.time_series.data.insert_dataframe(new_data)
batch = next(cognite_client.time_series.subscriptions.iterate_data(new_subscription.external_id))
Expand Down
81 changes: 45 additions & 36 deletions tests/tests_integration/test_api/test_datapoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -1577,24 +1577,28 @@ def test_query_no_ts_exists(self, retrieve_endpoints: list[Callable]) -> None:
def test_timezone_raw_query_dst_transitions(
self, all_retrieve_endpoints: list[Callable], dps_queries_dst_transitions: list[DatapointsQuery]
) -> None:
expected_index = pd.to_datetime(
[
# to summer
"1991-03-31 00:20:05.911+01:00",
"1991-03-31 00:39:49.780+01:00",
"1991-03-31 03:21:08.144+02:00",
"1991-03-31 03:28:06.963+02:00",
"1991-03-31 03:28:51.903+02:00",
# to winter
"1991-09-29 01:02:37.949+02:00",
"1991-09-29 02:09:29.699+02:00",
"1991-09-29 02:11:39.983+02:00",
"1991-09-29 02:10:59.442+01:00",
"1991-09-29 02:52:26.212+01:00",
"1991-09-29 04:12:02.558+01:00",
],
utc=True, # pandas is not great at parameter names
).tz_convert("Europe/Oslo")
expected_index = (
pd.to_datetime(
[
# to summer
"1991-03-31 00:20:05.911+01:00",
"1991-03-31 00:39:49.780+01:00",
"1991-03-31 03:21:08.144+02:00",
"1991-03-31 03:28:06.963+02:00",
"1991-03-31 03:28:51.903+02:00",
# to winter
"1991-09-29 01:02:37.949+02:00",
"1991-09-29 02:09:29.699+02:00",
"1991-09-29 02:11:39.983+02:00",
"1991-09-29 02:10:59.442+01:00",
"1991-09-29 02:52:26.212+01:00",
"1991-09-29 04:12:02.558+01:00",
],
utc=True, # pandas is not great at parameter names
)
.tz_convert("Europe/Oslo")
.as_unit("ms")
)
expected_to_summer_index = expected_index[:5]
expected_to_winter_index = expected_index[5:]
for endpoint, convert in zip(all_retrieve_endpoints, (True, True, False)):
Expand Down Expand Up @@ -2201,21 +2205,25 @@ def test_timezone_agg_query_dst_transitions(
) -> None:
expected_values1 = [-0.0408386913634, -0.1204416510548, -0.1519269888052, 0.00331827604225]
expected_values2 = [-0.0503489023269, 0.190474485259, 0.102249925079, -0.1000846729966]
expected_index = pd.to_datetime(
[
# to summer
"2023-03-26 00:00:00+01:00",
"2023-03-26 01:00:00+01:00",
"2023-03-26 03:00:00+02:00",
"2023-03-26 04:00:00+02:00",
# to winter
"2023-10-29 01:00:00+02:00",
"2023-10-29 02:00:00+02:00",
"2023-10-29 02:00:00+01:00",
"2023-10-29 03:00:00+01:00",
],
utc=True, # pandas is still not great at parameter names
).tz_convert("Europe/Oslo")
expected_index = (
pd.to_datetime(
[
# to summer
"2023-03-26 00:00:00+01:00",
"2023-03-26 01:00:00+01:00",
"2023-03-26 03:00:00+02:00",
"2023-03-26 04:00:00+02:00",
# to winter
"2023-10-29 01:00:00+02:00",
"2023-10-29 02:00:00+02:00",
"2023-10-29 02:00:00+01:00",
"2023-10-29 03:00:00+01:00",
],
utc=True, # pandas is still not great at parameter names
)
.tz_convert("Europe/Oslo")
.as_unit("ms")
)
expected_to_summer_index = expected_index[:4]
expected_to_winter_index = expected_index[4:]
for endpoint, convert in zip(all_retrieve_endpoints, (True, True, False)):
Expand Down Expand Up @@ -2449,7 +2457,8 @@ def test_raw_dps(
assert res_df.isna().sum().sum() == 0
assert res_df.shape == (exp_len, n_ts)
assert res_df.dtypes.nunique() == 1
assert res_df.dtypes.iloc[0] == exp_dtype
dtype = res_df.dtypes.iloc[0]
assert dtype == exp_dtype or (exp_dtype is object and pd.api.types.is_string_dtype(dtype))

@pytest.mark.parametrize("uniform, exp_n_ts_delta, exp_n_nans_step_interp", ((True, 1, 1), (False, 2, 0)))
def test_agg_uniform_true_false(
Expand Down Expand Up @@ -2504,8 +2513,8 @@ def test_low_limits(
)
# We have duplicates in df.columns, so to test specific columns, we reset first:
res_df.columns = c1, c2, c3, c4, *cx = range(len(res_df.columns))
assert res_df[[c1, c2]].dtypes.unique() == [object]
assert res_df[[c3, c4, *cx]].dtypes.unique() == [np.float64]
assert all(pd.api.types.is_string_dtype(dt) for dt in res_df[[c1, c2]].dtypes)
assert (res_df[[c3, c4, *cx]].dtypes == np.float64).all()
assert (res_df[[c1, c3, *cx]].count() == [limit] * (len(cx) + 2)).all()
assert (res_df[[c2, c4]].count() == [limit + 2] * 2).all()

Expand Down
18 changes: 10 additions & 8 deletions tests/tests_unit/test_api/test_datapoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -589,7 +589,7 @@ def test_datapoints(self) -> None:
expected_df = pd.DataFrame(
# Since ID is not unique, we use stand-in column names initially, then replace:
{"first-col": [2, 3, 4.0], "second-col": [3, 4, 5.0]},
index=pd.to_datetime(range(1, 4), unit="ms"),
index=pd.to_datetime(range(1, 4), unit="ms").as_unit("ms"),
)
expected_df.columns = pd.MultiIndex.from_tuples(
[(1, "average"), (1, "step_interpolation")],
Expand All @@ -601,11 +601,11 @@ def test_datapoints_no_names(self) -> None:
import pandas as pd

d = Datapoints(id=1, is_string=False, is_step=False, type="numeric", timestamp=[1, 2, 3], average=[2, 3, 4])
expected_df = pd.DataFrame({1: [2, 3, 4.0]}, index=pd.to_datetime(range(1, 4), unit="ms"))
expected_df = pd.DataFrame({1: [2, 3, 4.0]}, index=pd.to_datetime(range(1, 4), unit="ms").as_unit("ms"))
expected_df.columns = pd.MultiIndex.from_tuples([(1,)], names=["identifier"])
pd.testing.assert_frame_equal(expected_df, d.to_pandas(include_aggregate_name=False))

expected_df = pd.DataFrame({1: [2, 3, 4.0]}, index=pd.to_datetime(range(1, 4), unit="ms"))
expected_df = pd.DataFrame({1: [2, 3, 4.0]}, index=pd.to_datetime(range(1, 4), unit="ms").as_unit("ms"))
expected_df.columns = pd.MultiIndex.from_tuples([(1, "average")], names=["identifier", "aggregate"])
pd.testing.assert_frame_equal(expected_df, d.to_pandas(include_aggregate_name=True))

Expand All @@ -624,7 +624,7 @@ def test_id_and_external_id_set_gives_external_id_columns(self) -> None:
)
expected_df = pd.DataFrame(
{"abc": [2, 3, 4.0], "also-abc": [3, 4, 5.0]},
index=pd.to_datetime(range(1, 4), unit="ms"),
index=pd.to_datetime(range(1, 4), unit="ms").as_unit("ms"),
)
expected_df.columns = pd.MultiIndex.from_tuples(
[("abc", "average"), ("abc", "step_interpolation")],
Expand Down Expand Up @@ -668,7 +668,7 @@ def test_datapoints_list(self) -> None:
"col4": [3, 4, 5, None],
"col5": [1, None, 3, 4.0],
},
index=pd.to_datetime(range(1, 5), unit="ms"),
index=pd.to_datetime(range(1, 5), unit="ms").as_unit("ms"),
)
expected_df.columns = pd.MultiIndex.from_tuples(
[(1, "average"), (1, "step_interpolation"), ("foo", "count"), ("foo", "step_interpolation"), (3, "")],
Expand All @@ -682,7 +682,9 @@ def test_datapoints_list_names(self) -> None:
d1 = Datapoints(id=2, is_string=False, is_step=False, type="numeric", timestamp=[1, 2, 3], max=[2, 3, 4])
d2 = Datapoints(id=3, is_string=False, is_step=False, type="numeric", timestamp=[1, 3], average=[1, 3])
dps_list = DatapointsList([d1, d2])
expected_df = pd.DataFrame({1: [2, 3, 4.0], 2: [1, None, 3]}, index=pd.to_datetime(range(1, 4), unit="ms"))
expected_df = pd.DataFrame(
{1: [2, 3, 4.0], 2: [1, None, 3]}, index=pd.to_datetime(range(1, 4), unit="ms").as_unit("ms")
)
expected_df.columns = pd.MultiIndex.from_tuples([(2, "max"), (3, "average")], names=["identifier", "aggregate"])
pd.testing.assert_frame_equal(expected_df, dps_list.to_pandas(), check_freq=False)
expected_df.columns = pd.MultiIndex.from_tuples([(2,), (3,)], names=["identifier"])
Expand All @@ -696,7 +698,7 @@ def test_datapoints_list_names_dup(self) -> None:
dps_list = DatapointsList([d1, d2])
expected_df = pd.DataFrame(
{1: [2, 3, 4.0], 2: [1, None, 3]},
index=pd.to_datetime(range(1, 4), unit="ms"),
index=pd.to_datetime(range(1, 4), unit="ms").as_unit("ms"),
)
expected_df.columns = pd.MultiIndex.from_tuples([(2, "max"), (2, "average")], names=["identifier", "aggregate"])
pd.testing.assert_frame_equal(expected_df, dps_list.to_pandas(), check_freq=False)
Expand All @@ -713,7 +715,7 @@ def test_datapoints_list_non_aligned(self) -> None:

expected_df = pd.DataFrame(
{1: [1, 2, 3, None, None], 2: [None, None, 3, 4, 5]},
index=pd.to_datetime(range(1, 6), unit="ms"),
index=pd.to_datetime(range(1, 6), unit="ms").as_unit("ms"),
)
expected_df.columns = pd.MultiIndex.from_tuples([(1,), (2,)], names=["identifier"])
pd.testing.assert_frame_equal(expected_df, dps_list.to_pandas(), check_freq=False)
Expand Down
6 changes: 4 additions & 2 deletions tests/tests_unit/test_api/test_raw.py
Original file line number Diff line number Diff line change
Expand Up @@ -652,7 +652,8 @@ def test_dbs_to_pandas(self) -> None:
pd.DataFrame({"name": ["kar", "car", "dar"]}), db_list.to_pandas().drop("created_time", axis=1)
)
pd.testing.assert_frame_equal(
pd.DataFrame({"value": ["kar"]}, index=["name"]), db_list[0].to_pandas().drop("created_time")
pd.DataFrame({"value": ["kar"]}, index=["name"], dtype=object),
db_list[0].to_pandas().drop("created_time"),
)

def test_tables_to_pandas(self) -> None:
Expand All @@ -666,7 +667,8 @@ def test_tables_to_pandas(self) -> None:
pd.DataFrame({"name": ["kar", "car", "dar"]}), table_list.to_pandas().drop("created_time", axis=1)
)
pd.testing.assert_frame_equal(
pd.DataFrame({"value": ["kar"]}, index=["name"]), table_list[0].to_pandas().drop("created_time")
pd.DataFrame({"value": ["kar"]}, index=["name"], dtype=object),
table_list[0].to_pandas().drop("created_time"),
)

def test_rows_to_pandas(self) -> None:
Expand Down
4 changes: 2 additions & 2 deletions tests/tests_unit/test_api/test_sequences.py
Original file line number Diff line number Diff line change
Expand Up @@ -740,8 +740,8 @@ def test_sequences_to_pandas(self, cognite_client: CogniteClient, mock_seq_respo
df = seq.to_pandas(expand_metadata=True, metadata_prefix="")
assert isinstance(df, pd.DataFrame)
assert "metadata" not in df.columns
assert "string" == df.loc["description"][0]
assert "metadata-value" == df.loc["metadata-key"][0]
assert "string" == df.loc["description"].iloc[0]
assert "metadata-value" == df.loc["metadata-key"].iloc[0]

def test_insert_dataframe_extids(self, cognite_client: CogniteClient, mock_post_sequence_data: HTTPXMock) -> None:
import pandas as pd
Expand Down
4 changes: 2 additions & 2 deletions tests/tests_unit/test_api/test_time_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,5 +259,5 @@ def test_time_series_to_pandas(self, cognite_client: CogniteClient, mock_ts_resp
df = ts.to_pandas(expand_metadata=True, metadata_prefix="", camel_case=True)
assert isinstance(df, pd.DataFrame)
assert "metadata" not in df.columns
assert [0] == df.loc["securityCategories"][0]
assert "metadata-value" == df.loc["metadata-key"][0]
assert [0] == df.loc["securityCategories"].iloc[0]
assert "metadata-value" == df.loc["metadata-key"].iloc[0]
2 changes: 1 addition & 1 deletion tests/tests_unit/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -501,7 +501,7 @@ def test_to_pandas(self) -> None:
expected_df = pd.DataFrame(
{
"varA": [1, 2],
"lastUpdatedTime": [pd.Timestamp(60, unit="ms"), pd.NaT],
"lastUpdatedTime": pd.array([pd.Timestamp(60, unit="ms"), pd.NaT], dtype="datetime64[ms]"),
"varB": [None, 3],
},
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -305,8 +305,8 @@ def test_to_pandas_list(self, person_read: PersonRead) -> None:
"space": ["sp_my_fixed_space"],
"external_id": ["my_external_id"],
"version": [1],
"last_updated_time": [pd.Timestamp("1970-01-01 00:00:00")],
"created_time": [pd.Timestamp("1970-01-01 00:00:00")],
"last_updated_time": pd.array([pd.Timestamp(0, unit="ms")], dtype="datetime64[ms]"),
"created_time": pd.array([pd.Timestamp(0, unit="ms")], dtype="datetime64[ms]"),
"instance_type": ["node"],
"type": [{"space": "sp_model_space", "external_id": "person"}],
"name": ["John Doe"],
Expand Down
2 changes: 1 addition & 1 deletion tests/tests_unit/test_data_classes/test_datapoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def test_identifier_priority(self, dps_lst_cls: type[CogniteResourceList]) -> No

exp_df = pd.DataFrame(
{1: 2.0, 2: 4.0, 3: 6.0},
index=np.array([1234 * 1_000_000], dtype="datetime64[ns]"),
index=np.array([1234], dtype="datetime64[ms]"),
)
exp_df.columns = pd.MultiIndex.from_tuples(
[(123,), ("foo",), (NodeId(space="s", external_id="x"),)], names=["identifier"]
Expand Down
2 changes: 1 addition & 1 deletion tests/tests_unit/test_data_classes/test_groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ class TestGroupsList:
@pytest.mark.parametrize(
"convert_timestamps, expected",
(
(True, dict(data=[None, "1970-01-02 10:17:36.789", None], dtype="datetime64[ns]", name="deleted_time")),
(True, dict(data=[None, "1970-01-02 10:17:36.789", None], dtype="datetime64[ms]", name="deleted_time")),
(False, dict(data=[-1, 123456789, None], dtype="Int64", name="deleted_time")),
),
)
Expand Down
Loading