From 85fb82c8d20d2bd9c65ca7c0e855675640355061 Mon Sep 17 00:00:00 2001 From: Ian Later Date: Thu, 20 Nov 2025 15:22:08 -0800 Subject: [PATCH 1/7] python(fix): Allow normal pagination for fetching data. --- .../sift_client/_internal/low_level_wrappers/data.py | 10 ++++------ python/lib/sift_client/resources/channels.py | 9 ++++++--- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/python/lib/sift_client/_internal/low_level_wrappers/data.py b/python/lib/sift_client/_internal/low_level_wrappers/data.py index e5370bbe7..af469ba71 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/data.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/data.py @@ -3,7 +3,6 @@ import asyncio import logging from datetime import datetime, timezone -from math import ceil from typing import TYPE_CHECKING, Any, cast import pandas as pd @@ -231,7 +230,8 @@ async def get_channel_data( run_id: str | None = None, start_time: datetime | None = None, end_time: datetime | None = None, - limit: int | None = None, + max_results: int | None = None, + page_size: int | None = None, ignore_cache: bool = False, ) -> dict[str, pd.DataFrame]: """Get the data for a channel during a run.""" @@ -247,8 +247,6 @@ async def get_channel_data( ) tasks = [] - page_size = limit if limit and limit < 1000 else 1000 - limit = ceil(limit / page_size) if limit else 10 # Queue up calls for non-cached channels in batches. batch_size = REQUEST_BATCH_SIZE for i in range(0, len(not_cached_channels), batch_size): # type: ignore @@ -264,7 +262,7 @@ async def get_channel_data( "end_time": end_time, }, page_size=page_size, - max_results=limit, + max_results=max_results, ) ) tasks.append(task) @@ -294,7 +292,7 @@ async def get_channel_data( "end_time": new_end_time or end_time, }, page_size=page_size, - max_results=limit, + max_results=max_results, ) ) tasks.append(task) diff --git a/python/lib/sift_client/resources/channels.py b/python/lib/sift_client/resources/channels.py index d946d094f..aca37d38a 100644 --- a/python/lib/sift_client/resources/channels.py +++ b/python/lib/sift_client/resources/channels.py @@ -177,7 +177,8 @@ async def get_data( run: Run | str | None = None, start_time: datetime | None = None, end_time: datetime | None = None, - limit: int | None = None, + max_results: int | None = None, + page_size: int | None = None, ) -> dict[str, pd.DataFrame]: """Get data for one or more channels. @@ -186,7 +187,8 @@ async def get_data( run: The Run or run_id to get data for. start_time: The start time to get data for. end_time: The end time to get data for. - limit: The maximum number of data points to return. Will be in increments of page_size or default page size defined by the call if no page_size is provided. + max_results: The maximum number of data points to return. + page_size: The number of data points to return per page. Returns: A dictionary mapping channel names to pandas DataFrames containing the channel data. @@ -199,7 +201,8 @@ async def get_data( run_id=run_id, start_time=start_time, end_time=end_time, - limit=limit, + max_results=max_results, + page_size=page_size, ) async def get_data_as_arrow( From 89c911a7f87769073d2e39e0f51dc1367005b247 Mon Sep 17 00:00:00 2001 From: Ian Later Date: Thu, 20 Nov 2025 16:49:38 -0800 Subject: [PATCH 2/7] Fix get_data_pyarrow --- python/lib/sift_client/resources/channels.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/python/lib/sift_client/resources/channels.py b/python/lib/sift_client/resources/channels.py index aca37d38a..8a21f8674 100644 --- a/python/lib/sift_client/resources/channels.py +++ b/python/lib/sift_client/resources/channels.py @@ -212,9 +212,10 @@ async def get_data_as_arrow( run: Run | str | None = None, start_time: datetime | None = None, end_time: datetime | None = None, - limit: int | None = None, + max_results: int | None = None, + page_size: int | None = None, ) -> dict[str, pa.Table]: - """Get data for one or more channels as pyarrow tables.""" + """Same as get_data but returns data as pyarrow tables.""" from pyarrow import Table as ArrowTable run_id = run.id_ if isinstance(run, Run) else run @@ -223,6 +224,7 @@ async def get_data_as_arrow( run=run_id, start_time=start_time, end_time=end_time, - limit=limit, + max_results=max_results, + page_size=page_size, ) return {k: ArrowTable.from_pandas(v) for k, v in data.items()} From 1a5792a80aeea3ebca4a80c8c83a0be002ac474b Mon Sep 17 00:00:00 2001 From: Ian Later Date: Thu, 20 Nov 2025 17:14:29 -0800 Subject: [PATCH 3/7] Stubs --- python/lib/sift_client/resources/__init__.py | 2 +- .../lib/sift_client/resources/sync_stubs/__init__.pyi | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/python/lib/sift_client/resources/__init__.py b/python/lib/sift_client/resources/__init__.py index 7b2eacc29..3a76018b3 100644 --- a/python/lib/sift_client/resources/__init__.py +++ b/python/lib/sift_client/resources/__init__.py @@ -114,7 +114,7 @@ async def get_data(): run="run123", start_time=datetime.now() - timedelta(hours=1), end_time=datetime.now(), - limit=10000 + max_results=10000 ) # data is a dict mapping channel names to DataFrames diff --git a/python/lib/sift_client/resources/sync_stubs/__init__.pyi b/python/lib/sift_client/resources/sync_stubs/__init__.pyi index f85aa6132..ee3d3f3fb 100644 --- a/python/lib/sift_client/resources/sync_stubs/__init__.pyi +++ b/python/lib/sift_client/resources/sync_stubs/__init__.pyi @@ -431,7 +431,8 @@ class ChannelsAPI: run: Run | str | None = None, start_time: datetime | None = None, end_time: datetime | None = None, - limit: int | None = None, + max_results: int | None = None, + page_size: int | None = None, ) -> dict[str, pd.DataFrame]: """Get data for one or more channels. @@ -440,7 +441,8 @@ class ChannelsAPI: run: The Run or run_id to get data for. start_time: The start time to get data for. end_time: The end time to get data for. - limit: The maximum number of data points to return. Will be in increments of page_size or default page size defined by the call if no page_size is provided. + max_results: The maximum number of data points to return. + page_size: The number of data points to return per page. Returns: A dictionary mapping channel names to pandas DataFrames containing the channel data. @@ -454,9 +456,10 @@ class ChannelsAPI: run: Run | str | None = None, start_time: datetime | None = None, end_time: datetime | None = None, - limit: int | None = None, + max_results: int | None = None, + page_size: int | None = None, ) -> dict[str, pa.Table]: - """Get data for one or more channels as pyarrow tables.""" + """Same as get_data but returns data as pyarrow tables.""" ... def list_( From 0efa85d67020ccbf2d7a4ef5dcbd3fae52b66591 Mon Sep 17 00:00:00 2001 From: Ian Later Date: Thu, 20 Nov 2025 17:18:17 -0800 Subject: [PATCH 4/7] Pass through results all the way --- python/lib/sift_client/sift_types/channel.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/python/lib/sift_client/sift_types/channel.py b/python/lib/sift_client/sift_types/channel.py index 69ba4b8ed..0828f8272 100644 --- a/python/lib/sift_client/sift_types/channel.py +++ b/python/lib/sift_client/sift_types/channel.py @@ -298,7 +298,8 @@ def data( run_id: str | None = None, start_time: datetime | None = None, end_time: datetime | None = None, - limit: int | None = None, + page_size: int | None = None, + max_results: int | None = None, as_arrow: bool = False, ): """Retrieve channel data for this channel during the specified run. @@ -319,7 +320,8 @@ def data( run=run_id, start_time=start_time, end_time=end_time, - limit=limit, # type: ignore + max_results=max_results, + page_size=page_size, ) else: data = self.client.channels.get_data( @@ -327,7 +329,8 @@ def data( run=run_id, start_time=start_time, end_time=end_time, - limit=limit, # type: ignore + max_results=max_results, + page_size=page_size, ) return data From 5a5c689b73373cc13cbc067f8a9be20085f60f5e Mon Sep 17 00:00:00 2001 From: Ian Later Date: Thu, 20 Nov 2025 17:21:09 -0800 Subject: [PATCH 5/7] lint --- python/lib/sift_client/sift_types/channel.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/lib/sift_client/sift_types/channel.py b/python/lib/sift_client/sift_types/channel.py index 0828f8272..520909493 100644 --- a/python/lib/sift_client/sift_types/channel.py +++ b/python/lib/sift_client/sift_types/channel.py @@ -308,7 +308,8 @@ def data( run_id: The run ID to get data for. start_time: The start time to get data for. end_time: The end time to get data for. - limit: The maximum number of data points to return. + page_size: The number of data points to return per page. + max_results: The maximum number of data points to return. as_arrow: Whether to return the data as an Arrow table. Returns: From 9d80371b1b314ab66d4431b830023deb0f5ae8b2 Mon Sep 17 00:00:00 2001 From: Ian Later Date: Thu, 20 Nov 2025 17:24:57 -0800 Subject: [PATCH 6/7] lint --- python/lib/sift_client/_tests/conftest.py | 2 +- .../sift_client/_tests/sift_types/test_channel.py | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/python/lib/sift_client/_tests/conftest.py b/python/lib/sift_client/_tests/conftest.py index 81e24b287..1e0b176e4 100644 --- a/python/lib/sift_client/_tests/conftest.py +++ b/python/lib/sift_client/_tests/conftest.py @@ -25,7 +25,7 @@ def sift_client() -> SiftClient: api_key=api_key, grpc_url=grpc_url, rest_url=rest_url, - use_ssl=True, + use_ssl=False, ) ) diff --git a/python/lib/sift_client/_tests/sift_types/test_channel.py b/python/lib/sift_client/_tests/sift_types/test_channel.py index ab8fa01c2..e590de6e2 100644 --- a/python/lib/sift_client/_tests/sift_types/test_channel.py +++ b/python/lib/sift_client/_tests/sift_types/test_channel.py @@ -69,7 +69,8 @@ def test_data_method_calls_get_data(self, mock_channel, mock_client): run_id="run123", start_time=datetime(2024, 1, 1, tzinfo=timezone.utc), end_time=datetime(2024, 1, 2, tzinfo=timezone.utc), - limit=100, + max_results=100, + page_size=None, ) # Verify client method was called with correct parameters @@ -78,7 +79,8 @@ def test_data_method_calls_get_data(self, mock_channel, mock_client): run="run123", start_time=datetime(2024, 1, 1, tzinfo=timezone.utc), end_time=datetime(2024, 1, 2, tzinfo=timezone.utc), - limit=100, + max_results=100, + page_size=None, ) assert result == mock_data @@ -99,7 +101,8 @@ def test_data_method_as_arrow(self, mock_channel, mock_client): run="run123", start_time=None, end_time=None, - limit=None, + max_results=None, + page_size=None, ) mock_client.channels.get_data.assert_not_called() assert result == mock_data @@ -118,6 +121,7 @@ def test_data_method_with_minimal_params(self, mock_channel, mock_client): run=None, start_time=None, end_time=None, - limit=None, + max_results=None, + page_size=None, ) assert result == mock_data From f8b090ac3be379475c316a3bebe1888fbbf033fa Mon Sep 17 00:00:00 2001 From: Ian Later Date: Thu, 20 Nov 2025 17:25:08 -0800 Subject: [PATCH 7/7] tests --- python/lib/sift_client/_tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/lib/sift_client/_tests/conftest.py b/python/lib/sift_client/_tests/conftest.py index 1e0b176e4..81e24b287 100644 --- a/python/lib/sift_client/_tests/conftest.py +++ b/python/lib/sift_client/_tests/conftest.py @@ -25,7 +25,7 @@ def sift_client() -> SiftClient: api_key=api_key, grpc_url=grpc_url, rest_url=rest_url, - use_ssl=False, + use_ssl=True, ) )