cognitedata · haakonvt · Mar 25, 2026 · Mar 25, 2026 · Mar 25, 2026
@@ -145,7 +145,7 @@ def convert_nullable_int_cols(df: pd.DataFrame) -> pd.DataFrame:
 
 def convert_timestamp_columns_to_datetime(df: pd.DataFrame) -> pd.DataFrame:
     to_convert = df.columns.intersection(TIME_ATTRIBUTES)
-    df[to_convert] = (1_000_000 * df[to_convert]).astype("datetime64[ns]")
+    df[to_convert] = df[to_convert].astype("datetime64[ms]")
     return df
 
 
@@ -381,12 +381,16 @@ def _create_timestamp_index(
 
     match timestamps, timezone:
         case list(), None:
-            return pd.to_datetime(timestamps, unit="ms")
+            return pd.to_datetime(timestamps, unit="ms").as_unit("ms")
         case list(), _:
-            return pd.to_datetime(timestamps, unit="ms", utc=True).tz_convert(convert_tz_for_pandas(timezone))
+            return (
+                pd.to_datetime(timestamps, unit="ms", utc=True)
+                .tz_convert(convert_tz_for_pandas(timezone))
+                .as_unit("ms")
+            )
         case np.ndarray(), None:
-            return pd.to_datetime(timestamps)
+            return pd.to_datetime(timestamps).as_unit("ms")
         case np.ndarray(), _:
-            return pd.to_datetime(timestamps, utc=True).tz_convert(convert_tz_for_pandas(timezone))
+            return pd.to_datetime(timestamps, utc=True).as_unit("ms").tz_convert(convert_tz_for_pandas(timezone))
         case _:
             raise TypeError("Timestamps must be either list[int] or numpy.ndarray")
@@ -323,7 +323,7 @@ def test_iterate_data_subscription_datapoints_added(
                 {time_series_external_ids[0]: new_values},
                 index=pd.date_range(start=start, periods=2, freq="1d"),
             )
-            new_timestamps = new_data.index.asi8 // 10**6
+            new_timestamps = new_data.index.as_unit("ms").asi8
             try:
                 cognite_client.time_series.data.insert_dataframe(new_data)
                 batch = next(cognite_client.time_series.subscriptions.iterate_data(new_subscription.external_id))

@@ -1577,24 +1577,28 @@ def test_query_no_ts_exists(self, retrieve_endpoints: list[Callable]) -> None:
     def test_timezone_raw_query_dst_transitions(
         self, all_retrieve_endpoints: list[Callable], dps_queries_dst_transitions: list[DatapointsQuery]
     ) -> None:
-        expected_index = pd.to_datetime(
-            [
-                # to summer
-                "1991-03-31 00:20:05.911+01:00",
-                "1991-03-31 00:39:49.780+01:00",
-                "1991-03-31 03:21:08.144+02:00",
-                "1991-03-31 03:28:06.963+02:00",
-                "1991-03-31 03:28:51.903+02:00",
-                # to winter
-                "1991-09-29 01:02:37.949+02:00",
-                "1991-09-29 02:09:29.699+02:00",
-                "1991-09-29 02:11:39.983+02:00",
-                "1991-09-29 02:10:59.442+01:00",
-                "1991-09-29 02:52:26.212+01:00",
-                "1991-09-29 04:12:02.558+01:00",
-            ],
-            utc=True,  # pandas is not great at parameter names
-        ).tz_convert("Europe/Oslo")
+        expected_index = (
+            pd.to_datetime(
+                [
+                    # to summer
+                    "1991-03-31 00:20:05.911+01:00",
+                    "1991-03-31 00:39:49.780+01:00",
+                    "1991-03-31 03:21:08.144+02:00",
+                    "1991-03-31 03:28:06.963+02:00",
+                    "1991-03-31 03:28:51.903+02:00",
+                    # to winter
+                    "1991-09-29 01:02:37.949+02:00",
+                    "1991-09-29 02:09:29.699+02:00",
+                    "1991-09-29 02:11:39.983+02:00",
+                    "1991-09-29 02:10:59.442+01:00",
+                    "1991-09-29 02:52:26.212+01:00",
+                    "1991-09-29 04:12:02.558+01:00",
+                ],
+                utc=True,  # pandas is not great at parameter names
+            )
+            .tz_convert("Europe/Oslo")
+            .as_unit("ms")
+        )
         expected_to_summer_index = expected_index[:5]
         expected_to_winter_index = expected_index[5:]
         for endpoint, convert in zip(all_retrieve_endpoints, (True, True, False)):
@@ -2201,21 +2205,25 @@ def test_timezone_agg_query_dst_transitions(
     ) -> None:
         expected_values1 = [-0.0408386913634, -0.1204416510548, -0.1519269888052, 0.00331827604225]
         expected_values2 = [-0.0503489023269, 0.190474485259, 0.102249925079, -0.1000846729966]
-        expected_index = pd.to_datetime(
-            [
-                # to summer
-                "2023-03-26 00:00:00+01:00",
-                "2023-03-26 01:00:00+01:00",
-                "2023-03-26 03:00:00+02:00",
-                "2023-03-26 04:00:00+02:00",
-                # to winter
-                "2023-10-29 01:00:00+02:00",
-                "2023-10-29 02:00:00+02:00",
-                "2023-10-29 02:00:00+01:00",
-                "2023-10-29 03:00:00+01:00",
-            ],
-            utc=True,  # pandas is still not great at parameter names
-        ).tz_convert("Europe/Oslo")
+        expected_index = (
+            pd.to_datetime(
+                [
+                    # to summer
+                    "2023-03-26 00:00:00+01:00",
+                    "2023-03-26 01:00:00+01:00",
+                    "2023-03-26 03:00:00+02:00",
+                    "2023-03-26 04:00:00+02:00",
+                    # to winter
+                    "2023-10-29 01:00:00+02:00",
+                    "2023-10-29 02:00:00+02:00",
+                    "2023-10-29 02:00:00+01:00",
+                    "2023-10-29 03:00:00+01:00",
+                ],
+                utc=True,  # pandas is still not great at parameter names
+            )
+            .tz_convert("Europe/Oslo")
+            .as_unit("ms")
+        )
         expected_to_summer_index = expected_index[:4]
         expected_to_winter_index = expected_index[4:]
         for endpoint, convert in zip(all_retrieve_endpoints, (True, True, False)):
@@ -2449,7 +2457,8 @@ def test_raw_dps(
             assert res_df.isna().sum().sum() == 0
             assert res_df.shape == (exp_len, n_ts)
             assert res_df.dtypes.nunique() == 1
-            assert res_df.dtypes.iloc[0] == exp_dtype
+            dtype = res_df.dtypes.iloc[0]
+            assert dtype == exp_dtype or (exp_dtype is object and pd.api.types.is_string_dtype(dtype))
 
     @pytest.mark.parametrize("uniform, exp_n_ts_delta, exp_n_nans_step_interp", ((True, 1, 1), (False, 2, 0)))
     def test_agg_uniform_true_false(
@@ -2504,8 +2513,8 @@ def test_low_limits(
         )
         # We have duplicates in df.columns, so to test specific columns, we reset first:
         res_df.columns = c1, c2, c3, c4, *cx = range(len(res_df.columns))
-        assert res_df[[c1, c2]].dtypes.unique() == [object]
-        assert res_df[[c3, c4, *cx]].dtypes.unique() == [np.float64]
+        assert all(pd.api.types.is_string_dtype(dt) for dt in res_df[[c1, c2]].dtypes)
+        assert (res_df[[c3, c4, *cx]].dtypes == np.float64).all()
         assert (res_df[[c1, c3, *cx]].count() == [limit] * (len(cx) + 2)).all()
         assert (res_df[[c2, c4]].count() == [limit + 2] * 2).all()
 

@@ -589,7 +589,7 @@ def test_datapoints(self) -> None:
         expected_df = pd.DataFrame(
             # Since ID is not unique, we use stand-in column names initially, then replace:
             {"first-col": [2, 3, 4.0], "second-col": [3, 4, 5.0]},
-            index=pd.to_datetime(range(1, 4), unit="ms"),
+            index=pd.to_datetime(range(1, 4), unit="ms").as_unit("ms"),
         )
         expected_df.columns = pd.MultiIndex.from_tuples(
             [(1, "average"), (1, "step_interpolation")],
@@ -601,11 +601,11 @@ def test_datapoints_no_names(self) -> None:
         import pandas as pd
 
         d = Datapoints(id=1, is_string=False, is_step=False, type="numeric", timestamp=[1, 2, 3], average=[2, 3, 4])
-        expected_df = pd.DataFrame({1: [2, 3, 4.0]}, index=pd.to_datetime(range(1, 4), unit="ms"))
+        expected_df = pd.DataFrame({1: [2, 3, 4.0]}, index=pd.to_datetime(range(1, 4), unit="ms").as_unit("ms"))
         expected_df.columns = pd.MultiIndex.from_tuples([(1,)], names=["identifier"])
         pd.testing.assert_frame_equal(expected_df, d.to_pandas(include_aggregate_name=False))
 
-        expected_df = pd.DataFrame({1: [2, 3, 4.0]}, index=pd.to_datetime(range(1, 4), unit="ms"))
+        expected_df = pd.DataFrame({1: [2, 3, 4.0]}, index=pd.to_datetime(range(1, 4), unit="ms").as_unit("ms"))
         expected_df.columns = pd.MultiIndex.from_tuples([(1, "average")], names=["identifier", "aggregate"])
         pd.testing.assert_frame_equal(expected_df, d.to_pandas(include_aggregate_name=True))
 
@@ -624,7 +624,7 @@ def test_id_and_external_id_set_gives_external_id_columns(self) -> None:
         )
         expected_df = pd.DataFrame(
             {"abc": [2, 3, 4.0], "also-abc": [3, 4, 5.0]},
-            index=pd.to_datetime(range(1, 4), unit="ms"),
+            index=pd.to_datetime(range(1, 4), unit="ms").as_unit("ms"),
         )
         expected_df.columns = pd.MultiIndex.from_tuples(
             [("abc", "average"), ("abc", "step_interpolation")],
@@ -668,7 +668,7 @@ def test_datapoints_list(self) -> None:
                 "col4": [3, 4, 5, None],
                 "col5": [1, None, 3, 4.0],
             },
-            index=pd.to_datetime(range(1, 5), unit="ms"),
+            index=pd.to_datetime(range(1, 5), unit="ms").as_unit("ms"),
         )
         expected_df.columns = pd.MultiIndex.from_tuples(
             [(1, "average"), (1, "step_interpolation"), ("foo", "count"), ("foo", "step_interpolation"), (3, "")],
@@ -682,7 +682,9 @@ def test_datapoints_list_names(self) -> None:
         d1 = Datapoints(id=2, is_string=False, is_step=False, type="numeric", timestamp=[1, 2, 3], max=[2, 3, 4])
         d2 = Datapoints(id=3, is_string=False, is_step=False, type="numeric", timestamp=[1, 3], average=[1, 3])
         dps_list = DatapointsList([d1, d2])
-        expected_df = pd.DataFrame({1: [2, 3, 4.0], 2: [1, None, 3]}, index=pd.to_datetime(range(1, 4), unit="ms"))
+        expected_df = pd.DataFrame(
+            {1: [2, 3, 4.0], 2: [1, None, 3]}, index=pd.to_datetime(range(1, 4), unit="ms").as_unit("ms")
+        )
         expected_df.columns = pd.MultiIndex.from_tuples([(2, "max"), (3, "average")], names=["identifier", "aggregate"])
         pd.testing.assert_frame_equal(expected_df, dps_list.to_pandas(), check_freq=False)
         expected_df.columns = pd.MultiIndex.from_tuples([(2,), (3,)], names=["identifier"])
@@ -696,7 +698,7 @@ def test_datapoints_list_names_dup(self) -> None:
         dps_list = DatapointsList([d1, d2])
         expected_df = pd.DataFrame(
             {1: [2, 3, 4.0], 2: [1, None, 3]},
-            index=pd.to_datetime(range(1, 4), unit="ms"),
+            index=pd.to_datetime(range(1, 4), unit="ms").as_unit("ms"),
         )
         expected_df.columns = pd.MultiIndex.from_tuples([(2, "max"), (2, "average")], names=["identifier", "aggregate"])
         pd.testing.assert_frame_equal(expected_df, dps_list.to_pandas(), check_freq=False)
@@ -713,7 +715,7 @@ def test_datapoints_list_non_aligned(self) -> None:
 
         expected_df = pd.DataFrame(
             {1: [1, 2, 3, None, None], 2: [None, None, 3, 4, 5]},
-            index=pd.to_datetime(range(1, 6), unit="ms"),
+            index=pd.to_datetime(range(1, 6), unit="ms").as_unit("ms"),
         )
         expected_df.columns = pd.MultiIndex.from_tuples([(1,), (2,)], names=["identifier"])
         pd.testing.assert_frame_equal(expected_df, dps_list.to_pandas(), check_freq=False)

@@ -652,7 +652,8 @@ def test_dbs_to_pandas(self) -> None:
             pd.DataFrame({"name": ["kar", "car", "dar"]}), db_list.to_pandas().drop("created_time", axis=1)
         )
         pd.testing.assert_frame_equal(
-            pd.DataFrame({"value": ["kar"]}, index=["name"]), db_list[0].to_pandas().drop("created_time")
+            pd.DataFrame({"value": ["kar"]}, index=["name"], dtype=object),
+            db_list[0].to_pandas().drop("created_time"),
         )
 
     def test_tables_to_pandas(self) -> None:
@@ -666,7 +667,8 @@ def test_tables_to_pandas(self) -> None:
             pd.DataFrame({"name": ["kar", "car", "dar"]}), table_list.to_pandas().drop("created_time", axis=1)
         )
         pd.testing.assert_frame_equal(
-            pd.DataFrame({"value": ["kar"]}, index=["name"]), table_list[0].to_pandas().drop("created_time")
+            pd.DataFrame({"value": ["kar"]}, index=["name"], dtype=object),
+            table_list[0].to_pandas().drop("created_time"),
         )
 
     def test_rows_to_pandas(self) -> None:

@@ -740,8 +740,8 @@ def test_sequences_to_pandas(self, cognite_client: CogniteClient, mock_seq_respo
         df = seq.to_pandas(expand_metadata=True, metadata_prefix="")
         assert isinstance(df, pd.DataFrame)
         assert "metadata" not in df.columns
-        assert "string" == df.loc["description"][0]
-        assert "metadata-value" == df.loc["metadata-key"][0]
+        assert "string" == df.loc["description"].iloc[0]
+        assert "metadata-value" == df.loc["metadata-key"].iloc[0]
 
     def test_insert_dataframe_extids(self, cognite_client: CogniteClient, mock_post_sequence_data: HTTPXMock) -> None:
         import pandas as pd

@@ -259,5 +259,5 @@ def test_time_series_to_pandas(self, cognite_client: CogniteClient, mock_ts_resp
         df = ts.to_pandas(expand_metadata=True, metadata_prefix="", camel_case=True)
         assert isinstance(df, pd.DataFrame)
         assert "metadata" not in df.columns
-        assert [0] == df.loc["securityCategories"][0]
-        assert "metadata-value" == df.loc["metadata-key"][0]
+        assert [0] == df.loc["securityCategories"].iloc[0]
+        assert "metadata-value" == df.loc["metadata-key"].iloc[0]
@@ -501,7 +501,7 @@ def test_to_pandas(self) -> None:
         expected_df = pd.DataFrame(
             {
                 "varA": [1, 2],
-                "lastUpdatedTime": [pd.Timestamp(60, unit="ms"), pd.NaT],
+                "lastUpdatedTime": pd.array([pd.Timestamp(60, unit="ms"), pd.NaT], dtype="datetime64[ms]"),
                 "varB": [None, 3],
             },
         )

@@ -305,8 +305,8 @@ def test_to_pandas_list(self, person_read: PersonRead) -> None:
                     "space": ["sp_my_fixed_space"],
                     "external_id": ["my_external_id"],
                     "version": [1],
-                    "last_updated_time": [pd.Timestamp("1970-01-01 00:00:00")],
-                    "created_time": [pd.Timestamp("1970-01-01 00:00:00")],
+                    "last_updated_time": pd.array([pd.Timestamp(0, unit="ms")], dtype="datetime64[ms]"),
+                    "created_time": pd.array([pd.Timestamp(0, unit="ms")], dtype="datetime64[ms]"),
                     "instance_type": ["node"],
                     "type": [{"space": "sp_model_space", "external_id": "person"}],
                     "name": ["John Doe"],

@@ -111,7 +111,7 @@ def test_identifier_priority(self, dps_lst_cls: type[CogniteResourceList]) -> No
 
         exp_df = pd.DataFrame(
             {1: 2.0, 2: 4.0, 3: 6.0},
-            index=np.array([1234 * 1_000_000], dtype="datetime64[ns]"),
+            index=np.array([1234], dtype="datetime64[ms]"),
         )
         exp_df.columns = pd.MultiIndex.from_tuples(
             [(123,), ("foo",), (NodeId(space="s", external_id="x"),)], names=["identifier"]

@@ -104,7 +104,7 @@ class TestGroupsList:
     @pytest.mark.parametrize(
         "convert_timestamps, expected",
         (
-            (True, dict(data=[None, "1970-01-02 10:17:36.789", None], dtype="datetime64[ns]", name="deleted_time")),
+            (True, dict(data=[None, "1970-01-02 10:17:36.789", None], dtype="datetime64[ms]", name="deleted_time")),
             (False, dict(data=[-1, 123456789, None], dtype="Int64", name="deleted_time")),
         ),
     )