From d3bf48c7fd5419034ebedd8c034b7490fd51775a Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Thu, 14 May 2026 22:35:09 +0000 Subject: [PATCH 1/6] feat: add callback parameter to _read_gbq_colab --- .../ibis_compiler/scalar_op_registry.py | 2 +- packages/bigframes/bigframes/pandas/io/api.py | 6 ++++ .../bigframes/bigframes/session/__init__.py | 35 +++++++++++++------ .../tests/unit/session/test_read_gbq_colab.py | 10 ++++++ 4 files changed, 41 insertions(+), 12 deletions(-) diff --git a/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_registry.py b/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_registry.py index 926f220370b0..e9e8435ea1a1 100644 --- a/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_registry.py +++ b/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_registry.py @@ -16,7 +16,7 @@ import functools import typing -from typing import cast, Any +from typing import Any, cast import bigframes_vendored.ibis.expr.api as ibis_api import bigframes_vendored.ibis.expr.datatypes as ibis_dtypes diff --git a/packages/bigframes/bigframes/pandas/io/api.py b/packages/bigframes/bigframes/pandas/io/api.py index 6c83095ab3cd..f8c3a881eed6 100644 --- a/packages/bigframes/bigframes/pandas/io/api.py +++ b/packages/bigframes/bigframes/pandas/io/api.py @@ -300,6 +300,7 @@ def _try_read_gbq_colab_sessionless_dry_run( def _read_gbq_colab( # type: ignore[overload-overlap] query_or_table: str, *, + callback: Optional[Callable[[bigframes.core.events.Event], None]] = ..., pyformat_args: Optional[Dict[str, Any]] = ..., dry_run: Literal[False] = ..., ) -> bigframes.dataframe.DataFrame: ... @@ -309,6 +310,7 @@ def _read_gbq_colab( # type: ignore[overload-overlap] def _read_gbq_colab( query_or_table: str, *, + callback: Optional[Callable[[bigframes.core.events.Event], None]] = ..., pyformat_args: Optional[Dict[str, Any]] = ..., dry_run: Literal[True] = ..., ) -> pandas.Series: ... @@ -317,6 +319,7 @@ def _read_gbq_colab( def _read_gbq_colab( query_or_table: str, *, + callback: Optional[Callable[[bigframes.core.events.Event], None]] = None, pyformat_args: Optional[Dict[str, Any]] = None, dry_run: bool = False, ) -> bigframes.dataframe.DataFrame | pandas.Series: @@ -328,6 +331,8 @@ def _read_gbq_colab( Args: query_or_table (str): SQL query or table ID (table ID not yet supported). + callback (Optional[Callable[[bigframes.core.events.Event], None]]): + Callback to receive query execution events. pyformat_args (Optional[Dict[str, Any]]): Parameters to format into the query string. dry_run (bool): @@ -379,6 +384,7 @@ def _read_gbq_colab( return global_session.with_default_session( bigframes.session.Session._read_gbq_colab, query_or_table, + callback=callback, pyformat_args=pyformat_args, dry_run=dry_run, ) diff --git a/packages/bigframes/bigframes/session/__init__.py b/packages/bigframes/bigframes/session/__init__.py index 38e92a60321b..775cb4e33e4f 100644 --- a/packages/bigframes/bigframes/session/__init__.py +++ b/packages/bigframes/bigframes/session/__init__.py @@ -199,9 +199,10 @@ def __init__( self._clients_provider = clients_provider self._location = context.location or "US" else: - credentials, project = ( - bigframes._config.auth.resolve_credentials_and_project(context) - ) + ( + credentials, + project, + ) = bigframes._config.auth.resolve_credentials_and_project(context) if context.location is None: with bigquery.Client( project=project, @@ -584,6 +585,7 @@ def _read_gbq_colab( self, query: str, *, + callback: Optional[Callable[[bigframes.core.events.Event], None]] = ..., pyformat_args: Optional[Dict[str, Any]] = None, dry_run: Literal[False] = ..., ) -> dataframe.DataFrame: ... @@ -593,6 +595,7 @@ def _read_gbq_colab( self, query: str, *, + callback: Optional[Callable[[bigframes.core.events.Event], None]] = ..., pyformat_args: Optional[Dict[str, Any]] = None, dry_run: Literal[True] = ..., ) -> pandas.Series: ... @@ -601,8 +604,8 @@ def _read_gbq_colab( def _read_gbq_colab( self, query: str, - # TODO: Add a callback parameter that takes some kind of Event object. *, + callback: Optional[Callable[[bigframes.core.events.Event], None]] = None, pyformat_args: Optional[Dict[str, Any]] = None, dry_run: bool = False, ) -> Union[dataframe.DataFrame, pandas.Series]: @@ -615,6 +618,8 @@ def _read_gbq_colab( query (str): A SQL query string to execute. Results (if any) are turned into a DataFrame. + callback (Optional[Callable[[bigframes.core.events.Event], None]]): + Callback to receive query execution events. pyformat_args (dict): A dictionary of potential variables to replace in ``query``. Note: strings are _not_ escaped. Use query parameters for these, @@ -634,13 +639,21 @@ def _read_gbq_colab( dry_run=dry_run, ) - return self._loader.read_gbq_query( - query=query, - index_col=bigframes.enums.DefaultIndexKind.NULL, - force_total_order=False, - dry_run=typing.cast(Union[Literal[False], Literal[True]], dry_run), - allow_large_results=allow_large_results, - ) + def _run_query(): + return self._loader.read_gbq_query( + query=query, + index_col=bigframes.enums.DefaultIndexKind.NULL, + force_total_order=False, + dry_run=typing.cast( + Union[Literal[False], Literal[True]], dry_run + ), + allow_large_results=allow_large_results, + ) + + if callback is not None: + with self._publisher.subscribe(callback): + return _run_query() + return _run_query() @overload def read_gbq_query( # type: ignore[overload-overlap] diff --git a/packages/bigframes/tests/unit/session/test_read_gbq_colab.py b/packages/bigframes/tests/unit/session/test_read_gbq_colab.py index bb2cba0c1093..39d733dcc907 100644 --- a/packages/bigframes/tests/unit/session/test_read_gbq_colab.py +++ b/packages/bigframes/tests/unit/session/test_read_gbq_colab.py @@ -126,3 +126,13 @@ def test_read_gbq_colab_doesnt_set_destination_table(): assert query == "SELECT 'my-test-query';" assert config.destination is None + + +def test_read_gbq_colab_with_callback(): + """Make sure callback receives events during execution.""" + session = mocks.create_bigquery_session() + callback = mock.Mock() + + _ = session._read_gbq_colab("SELECT 'my-test-query';", callback=callback) + + assert callback.call_count > 0 From 7574f221c49fd6cba5c288a4bc5d8f47d3067e43 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 15 May 2026 19:59:55 +0000 Subject: [PATCH 2/6] feat: support execution history filtering by events and job_ids --- .../bigframes/core/global_session.py | 14 ++- .../bigframes/bigframes/session/__init__.py | 104 +++++++++++++++--- .../tests/unit/session/test_read_gbq_colab.py | 45 ++++++++ 3 files changed, 144 insertions(+), 19 deletions(-) diff --git a/packages/bigframes/bigframes/core/global_session.py b/packages/bigframes/bigframes/core/global_session.py index 6ffb37ac5acf..e18e1de7f156 100644 --- a/packages/bigframes/bigframes/core/global_session.py +++ b/packages/bigframes/bigframes/core/global_session.py @@ -19,7 +19,7 @@ import threading import traceback import warnings -from typing import TYPE_CHECKING, Callable, Optional, TypeVar +from typing import TYPE_CHECKING, Callable, Iterable, Optional, TypeVar import google.auth.exceptions @@ -124,12 +124,20 @@ def with_default_session(func_: Callable[..., _T], *args, **kwargs) -> _T: return func_(get_global_session(), *args, **kwargs) -def execution_history() -> "bigframes.session._ExecutionHistory": +def execution_history( + *, + events: Optional[Iterable[bigframes.core.events.Event]] = None, + job_ids: Optional[Iterable[str]] = None, +) -> "bigframes.session._ExecutionHistory": import pandas # noqa: F401 import bigframes.session - return with_default_session(bigframes.session.Session.execution_history) + return with_default_session( + bigframes.session.Session.execution_history, + events=events, + job_ids=job_ids, + ) class _GlobalSessionContext: diff --git a/packages/bigframes/bigframes/session/__init__.py b/packages/bigframes/bigframes/session/__init__.py index 775cb4e33e4f..2252e3dc9e56 100644 --- a/packages/bigframes/bigframes/session/__init__.py +++ b/packages/bigframes/bigframes/session/__init__.py @@ -113,6 +113,18 @@ class _ExecutionHistory: def __init__(self, jobs: list[dict]): self._df = pandas.DataFrame(jobs) + if self._df.empty: + self._df = pandas.DataFrame( + columns=[ + "job_id", + "query_id", + "job_type", + "status", + "query", + "total_bytes_processed", + "job_url", + ] + ) def to_dataframe(self) -> pandas.DataFrame: """Returns the execution history as a pandas DataFrame.""" @@ -431,12 +443,63 @@ def slot_millis_sum(self): """The sum of all slot time used by bigquery jobs in this session.""" return self._metrics.slot_millis - def execution_history(self) -> _ExecutionHistory: + def execution_history( + self, + *, + events: Optional[Iterable[bigframes.core.events.Event]] = None, + job_ids: Optional[Iterable[str]] = None, + ) -> _ExecutionHistory: """Returns the history of executions initiated by BigFrames in the current session. Use `.to_dataframe()` on the result to get a pandas DataFrame. + + Args: + events (Iterable[Event], optional): + Filter execution history to only include jobs associated with the given events. + job_ids (Iterable[str], optional): + Filter execution history to only include jobs matching the given job IDs. """ - return _ExecutionHistory([job.__dict__ for job in self._metrics.jobs]) + jobs = [job.__dict__ for job in self._metrics.jobs] + + if events is not None: + event_job_ids = { + getattr(event, "job_id", None) + for event in events + if getattr(event, "job_id", None) is not None + } + event_query_ids = { + getattr(event, "query_id", None) + for event in events + if getattr(event, "query_id", None) is not None + } + jobs = [ + job + for job in jobs + if ( + job.get("job_id") is not None and job.get("job_id") in event_job_ids + ) + or ( + job.get("query_id") is not None + and job.get("query_id") in event_query_ids + ) + ] + + if job_ids is not None: + target_job_ids = set(job_ids) + jobs = [ + job + for job in jobs + if ( + job.get("job_id") is not None + and job.get("job_id") in target_job_ids + ) + or ( + job.get("query_id") is not None + and job.get("query_id") in target_job_ids + ) + ] + + return _ExecutionHistory(jobs) @property def _allows_ambiguity(self) -> bool: @@ -499,7 +562,8 @@ def read_gbq( # type: ignore[overload-overlap] col_order: Iterable[str] = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., - ) -> dataframe.DataFrame: ... + ) -> dataframe.DataFrame: + ... @overload def read_gbq( @@ -515,7 +579,8 @@ def read_gbq( col_order: Iterable[str] = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.Series: ... + ) -> pandas.Series: + ... def read_gbq( self, @@ -588,7 +653,8 @@ def _read_gbq_colab( callback: Optional[Callable[[bigframes.core.events.Event], None]] = ..., pyformat_args: Optional[Dict[str, Any]] = None, dry_run: Literal[False] = ..., - ) -> dataframe.DataFrame: ... + ) -> dataframe.DataFrame: + ... @overload def _read_gbq_colab( @@ -598,7 +664,8 @@ def _read_gbq_colab( callback: Optional[Callable[[bigframes.core.events.Event], None]] = ..., pyformat_args: Optional[Dict[str, Any]] = None, dry_run: Literal[True] = ..., - ) -> pandas.Series: ... + ) -> pandas.Series: + ... @log_adapter.log_name_override("read_gbq_colab") def _read_gbq_colab( @@ -644,9 +711,7 @@ def _run_query(): query=query, index_col=bigframes.enums.DefaultIndexKind.NULL, force_total_order=False, - dry_run=typing.cast( - Union[Literal[False], Literal[True]], dry_run - ), + dry_run=typing.cast(Union[Literal[False], Literal[True]], dry_run), allow_large_results=allow_large_results, ) @@ -669,7 +734,8 @@ def read_gbq_query( # type: ignore[overload-overlap] filters: third_party_pandas_gbq.FiltersType = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., - ) -> dataframe.DataFrame: ... + ) -> dataframe.DataFrame: + ... @overload def read_gbq_query( @@ -685,7 +751,8 @@ def read_gbq_query( filters: third_party_pandas_gbq.FiltersType = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.Series: ... + ) -> pandas.Series: + ... def read_gbq_query( self, @@ -832,7 +899,8 @@ def read_gbq_table( # type: ignore[overload-overlap] use_cache: bool = ..., col_order: Iterable[str] = ..., dry_run: Literal[False] = ..., - ) -> dataframe.DataFrame: ... + ) -> dataframe.DataFrame: + ... @overload def read_gbq_table( @@ -846,7 +914,8 @@ def read_gbq_table( use_cache: bool = ..., col_order: Iterable[str] = ..., dry_run: Literal[True] = ..., - ) -> pandas.Series: ... + ) -> pandas.Series: + ... def read_gbq_table( self, @@ -997,7 +1066,8 @@ def read_pandas( pandas_dataframe: pandas.Index, *, write_engine: constants.WriteEngineType = "default", - ) -> bigframes.core.indexes.Index: ... + ) -> bigframes.core.indexes.Index: + ... @typing.overload def read_pandas( @@ -1005,7 +1075,8 @@ def read_pandas( pandas_dataframe: pandas.Series, *, write_engine: constants.WriteEngineType = "default", - ) -> bigframes.series.Series: ... + ) -> bigframes.series.Series: + ... @typing.overload def read_pandas( @@ -1013,7 +1084,8 @@ def read_pandas( pandas_dataframe: pandas.DataFrame, *, write_engine: constants.WriteEngineType = "default", - ) -> dataframe.DataFrame: ... + ) -> dataframe.DataFrame: + ... def read_pandas( self, diff --git a/packages/bigframes/tests/unit/session/test_read_gbq_colab.py b/packages/bigframes/tests/unit/session/test_read_gbq_colab.py index 39d733dcc907..49f4a136fbd3 100644 --- a/packages/bigframes/tests/unit/session/test_read_gbq_colab.py +++ b/packages/bigframes/tests/unit/session/test_read_gbq_colab.py @@ -136,3 +136,48 @@ def test_read_gbq_colab_with_callback(): _ = session._read_gbq_colab("SELECT 'my-test-query';", callback=callback) assert callback.call_count > 0 + + +def test_read_gbq_colab_filters_by_cell(): + """Verify that callbacks are scoped to individual executions.""" + session = mocks.create_bigquery_session() + callback1 = mock.Mock() + callback2 = mock.Mock() + + _ = session._read_gbq_colab("SELECT 'cell_1_query';", callback=callback1) + callback1_initial_count = callback1.call_count + + _ = session._read_gbq_colab("SELECT 'cell_2_query';", callback=callback2) + + # Verify callback1 was automatically unsubscribed upon completion + # of the first query. + assert callback1.call_count == callback1_initial_count + assert callback2.call_count > 0 + + +def test_execution_history_filtering(): + """Verify that execution_history can be filtered by job_ids or events.""" + from bigframes.session import metrics + + session = mocks.create_bigquery_session() + + # Add mock jobs to session metrics + job1 = metrics.JobMetadata( + job_id="job_1", job_type="query", query="SELECT 1" + ) + job2 = metrics.JobMetadata( + job_id="job_2", job_type="query", query="SELECT 2" + ) + session._metrics.jobs.extend([job1, job2]) + + # Verify filtering by job_ids isolates the target execution + history_job1 = session.execution_history(job_ids=["job_1"]).to_dataframe() + assert len(history_job1) == 1 + assert history_job1.iloc[0]["job_id"] == "job_1" + + # Verify filtering by events isolates the target execution + event2 = mock.Mock() + event2.job_id = "job_2" + history_job2 = session.execution_history(events=[event2]).to_dataframe() + assert len(history_job2) == 1 + assert history_job2.iloc[0]["job_id"] == "job_2" From d2eea39fabd8758729800da965f8e77fa9ddd553 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 15 May 2026 21:03:44 +0000 Subject: [PATCH 3/6] feat: support automatic per-cell execution history filtering --- .../bigframes/core/global_session.py | 3 ++ .../bigframes/bigframes/session/__init__.py | 22 +++++++++++- .../bigframes/bigframes/session/metrics.py | 34 +++++++++++++++++++ 3 files changed, 58 insertions(+), 1 deletion(-) diff --git a/packages/bigframes/bigframes/core/global_session.py b/packages/bigframes/bigframes/core/global_session.py index e18e1de7f156..c1164cca43be 100644 --- a/packages/bigframes/bigframes/core/global_session.py +++ b/packages/bigframes/bigframes/core/global_session.py @@ -128,6 +128,7 @@ def execution_history( *, events: Optional[Iterable[bigframes.core.events.Event]] = None, job_ids: Optional[Iterable[str]] = None, + filter_by_cell: bool = True, ) -> "bigframes.session._ExecutionHistory": import pandas # noqa: F401 @@ -137,9 +138,11 @@ def execution_history( bigframes.session.Session.execution_history, events=events, job_ids=job_ids, + filter_by_cell=filter_by_cell, ) + class _GlobalSessionContext: """ Context manager for testing that sets global session. diff --git a/packages/bigframes/bigframes/session/__init__.py b/packages/bigframes/bigframes/session/__init__.py index 2252e3dc9e56..5b6e5c6d0997 100644 --- a/packages/bigframes/bigframes/session/__init__.py +++ b/packages/bigframes/bigframes/session/__init__.py @@ -448,6 +448,7 @@ def execution_history( *, events: Optional[Iterable[bigframes.core.events.Event]] = None, job_ids: Optional[Iterable[str]] = None, + filter_by_cell: bool = True, ) -> _ExecutionHistory: """Returns the history of executions initiated by BigFrames in the current session. @@ -458,6 +459,9 @@ def execution_history( Filter execution history to only include jobs associated with the given events. job_ids (Iterable[str], optional): Filter execution history to only include jobs matching the given job IDs. + filter_by_cell (bool, optional): + If True and running in Colab/Jupyter, automatically filter history to only include + jobs executed within the current cell. Defaults to True. """ jobs = [job.__dict__ for job in self._metrics.jobs] @@ -484,7 +488,7 @@ def execution_history( ) ] - if job_ids is not None: + elif job_ids is not None: target_job_ids = set(job_ids) jobs = [ job @@ -499,8 +503,24 @@ def execution_history( ) ] + elif filter_by_cell: + try: + import IPython + + ipy = IPython.get_ipython() + if ipy is not None and hasattr(ipy, "execution_count"): + current_count = ipy.execution_count + jobs = [ + job + for job in jobs + if job.get("cell_execution_count") == current_count + ] + except (ImportError, NameError): + pass + return _ExecutionHistory(jobs) + @property def _allows_ambiguity(self) -> bool: return self._allow_ambiguity diff --git a/packages/bigframes/bigframes/session/metrics.py b/packages/bigframes/bigframes/session/metrics.py index d2682bbcaf7f..51590968f06c 100644 --- a/packages/bigframes/bigframes/session/metrics.py +++ b/packages/bigframes/bigframes/session/metrics.py @@ -51,6 +51,7 @@ class JobMetadata: input_bytes: Optional[int] = None output_rows: Optional[int] = None source_format: Optional[str] = None + cell_execution_count: Optional[int] = None @classmethod def from_job( @@ -65,6 +66,16 @@ def from_job( if job_id: job_url = f"https://console.cloud.google.com/bigquery?project={query_job.project}&j=bq:{query_job.location}:{job_id}&page=queryresults" + cell_execution_count = None + try: + import IPython + + ipy = IPython.get_ipython() + if ipy is not None and hasattr(ipy, "execution_count"): + cell_execution_count = ipy.execution_count + except (ImportError, NameError): + pass + metadata = cls( job_id=query_job.job_id, location=query_job.location, @@ -78,6 +89,7 @@ def from_job( error_result=query_job.error_result, query=query_text, job_url=job_url, + cell_execution_count=cell_execution_count, ) if isinstance(query_job, QueryJob): metadata.cached = getattr(query_job, "cache_hit", None) @@ -121,6 +133,16 @@ def from_row_iterator( location = getattr(row_iterator, "location", "") job_url = f"https://console.cloud.google.com/bigquery?project={project}&j=bq:{location}:{job_id}&page=queryresults" + cell_execution_count = None + try: + import IPython + + ipy = IPython.get_ipython() + if ipy is not None and hasattr(ipy, "execution_count"): + cell_execution_count = ipy.execution_count + except (ImportError, NameError): + pass + return cls( job_id=job_id, query_id=getattr(row_iterator, "query_id", None), @@ -137,6 +159,7 @@ def from_row_iterator( cached=getattr(row_iterator, "cache_hit", None), query=query_text, job_url=job_url, + cell_execution_count=cell_execution_count, ) @@ -249,10 +272,21 @@ def on_event(self, event: Any): bytes_processed = event.result.total_bytes_processed or 0 self.bytes_processed += bytes_processed + cell_execution_count = None + try: + import IPython + + ipy = IPython.get_ipython() + if ipy is not None and hasattr(ipy, "execution_count"): + cell_execution_count = ipy.execution_count + except (ImportError, NameError): + pass + metadata = JobMetadata( job_type="polars", status="DONE", total_bytes_processed=bytes_processed, + cell_execution_count=cell_execution_count, ) self.jobs.append(metadata) From 21d5dd858593aca11ee4fac4191409f23ba8eb5f Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 15 May 2026 21:05:33 +0000 Subject: [PATCH 4/6] format code --- packages/bigframes/bigframes/core/global_session.py | 1 - packages/bigframes/bigframes/session/__init__.py | 1 - .../bigframes/tests/unit/session/test_read_gbq_colab.py | 8 ++------ 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/packages/bigframes/bigframes/core/global_session.py b/packages/bigframes/bigframes/core/global_session.py index c1164cca43be..f9772eed6c88 100644 --- a/packages/bigframes/bigframes/core/global_session.py +++ b/packages/bigframes/bigframes/core/global_session.py @@ -142,7 +142,6 @@ def execution_history( ) - class _GlobalSessionContext: """ Context manager for testing that sets global session. diff --git a/packages/bigframes/bigframes/session/__init__.py b/packages/bigframes/bigframes/session/__init__.py index 5b6e5c6d0997..6d2bb1e87c51 100644 --- a/packages/bigframes/bigframes/session/__init__.py +++ b/packages/bigframes/bigframes/session/__init__.py @@ -520,7 +520,6 @@ def execution_history( return _ExecutionHistory(jobs) - @property def _allows_ambiguity(self) -> bool: return self._allow_ambiguity diff --git a/packages/bigframes/tests/unit/session/test_read_gbq_colab.py b/packages/bigframes/tests/unit/session/test_read_gbq_colab.py index 49f4a136fbd3..faee3cd9961c 100644 --- a/packages/bigframes/tests/unit/session/test_read_gbq_colab.py +++ b/packages/bigframes/tests/unit/session/test_read_gbq_colab.py @@ -162,12 +162,8 @@ def test_execution_history_filtering(): session = mocks.create_bigquery_session() # Add mock jobs to session metrics - job1 = metrics.JobMetadata( - job_id="job_1", job_type="query", query="SELECT 1" - ) - job2 = metrics.JobMetadata( - job_id="job_2", job_type="query", query="SELECT 2" - ) + job1 = metrics.JobMetadata(job_id="job_1", job_type="query", query="SELECT 1") + job2 = metrics.JobMetadata(job_id="job_2", job_type="query", query="SELECT 2") session._metrics.jobs.extend([job1, job2]) # Verify filtering by job_ids isolates the target execution From 97c726056e5dc4696b34d7edb61433a5bfc105c5 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Sat, 16 May 2026 00:48:52 +0000 Subject: [PATCH 5/6] fix: avoid mutating extra_query_labels in place --- packages/bigframes/bigframes/session/_io/bigquery/__init__.py | 2 ++ packages/bigframes/bigframes/session/bigquery_session.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/bigframes/bigframes/session/_io/bigquery/__init__.py b/packages/bigframes/bigframes/session/_io/bigquery/__init__.py index 7cd7634b08e4..3d273f6de6bf 100644 --- a/packages/bigframes/bigframes/session/_io/bigquery/__init__.py +++ b/packages/bigframes/bigframes/session/_io/bigquery/__init__.py @@ -54,6 +54,8 @@ def create_job_configs_labels( ) -> Dict[str, str]: if job_configs_labels is None: job_configs_labels = {} + else: + job_configs_labels = dict(job_configs_labels) if api_methods and "bigframes-api" not in job_configs_labels: job_configs_labels["bigframes-api"] = api_methods[0] diff --git a/packages/bigframes/bigframes/session/bigquery_session.py b/packages/bigframes/bigframes/session/bigquery_session.py index a39c6136876d..18f8cdeaff49 100644 --- a/packages/bigframes/bigframes/session/bigquery_session.py +++ b/packages/bigframes/bigframes/session/bigquery_session.py @@ -122,7 +122,7 @@ def close(self): # Assume this is being called in the user thread, so we can access # this thread-local config. job_config=bigquery.QueryJobConfig( - labels=bigframes.options.compute.extra_query_labels + labels=dict(bigframes.options.compute.extra_query_labels) ), location=self.location, project=None, From f080dda84c68934137ecf2e3c161c9fd914099a3 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Sat, 16 May 2026 00:50:28 +0000 Subject: [PATCH 6/6] format code --- .../bigframes/bigframes/session/__init__.py | 33 +++++++------------ 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/packages/bigframes/bigframes/session/__init__.py b/packages/bigframes/bigframes/session/__init__.py index 6d2bb1e87c51..d7ab1d91e8bd 100644 --- a/packages/bigframes/bigframes/session/__init__.py +++ b/packages/bigframes/bigframes/session/__init__.py @@ -581,8 +581,7 @@ def read_gbq( # type: ignore[overload-overlap] col_order: Iterable[str] = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... @overload def read_gbq( @@ -598,8 +597,7 @@ def read_gbq( col_order: Iterable[str] = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.Series: - ... + ) -> pandas.Series: ... def read_gbq( self, @@ -672,8 +670,7 @@ def _read_gbq_colab( callback: Optional[Callable[[bigframes.core.events.Event], None]] = ..., pyformat_args: Optional[Dict[str, Any]] = None, dry_run: Literal[False] = ..., - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... @overload def _read_gbq_colab( @@ -683,8 +680,7 @@ def _read_gbq_colab( callback: Optional[Callable[[bigframes.core.events.Event], None]] = ..., pyformat_args: Optional[Dict[str, Any]] = None, dry_run: Literal[True] = ..., - ) -> pandas.Series: - ... + ) -> pandas.Series: ... @log_adapter.log_name_override("read_gbq_colab") def _read_gbq_colab( @@ -753,8 +749,7 @@ def read_gbq_query( # type: ignore[overload-overlap] filters: third_party_pandas_gbq.FiltersType = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... @overload def read_gbq_query( @@ -770,8 +765,7 @@ def read_gbq_query( filters: third_party_pandas_gbq.FiltersType = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.Series: - ... + ) -> pandas.Series: ... def read_gbq_query( self, @@ -918,8 +912,7 @@ def read_gbq_table( # type: ignore[overload-overlap] use_cache: bool = ..., col_order: Iterable[str] = ..., dry_run: Literal[False] = ..., - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... @overload def read_gbq_table( @@ -933,8 +926,7 @@ def read_gbq_table( use_cache: bool = ..., col_order: Iterable[str] = ..., dry_run: Literal[True] = ..., - ) -> pandas.Series: - ... + ) -> pandas.Series: ... def read_gbq_table( self, @@ -1085,8 +1077,7 @@ def read_pandas( pandas_dataframe: pandas.Index, *, write_engine: constants.WriteEngineType = "default", - ) -> bigframes.core.indexes.Index: - ... + ) -> bigframes.core.indexes.Index: ... @typing.overload def read_pandas( @@ -1094,8 +1085,7 @@ def read_pandas( pandas_dataframe: pandas.Series, *, write_engine: constants.WriteEngineType = "default", - ) -> bigframes.series.Series: - ... + ) -> bigframes.series.Series: ... @typing.overload def read_pandas( @@ -1103,8 +1093,7 @@ def read_pandas( pandas_dataframe: pandas.DataFrame, *, write_engine: constants.WriteEngineType = "default", - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... def read_pandas( self,