From 38fcbc0625728225621ce09cf5f4b1877160f9eb Mon Sep 17 00:00:00 2001 From: SubramanyaV Date: Wed, 25 Mar 2026 14:39:19 +0530 Subject: [PATCH 1/4] Improve docstring for PGBKCVOperation --- sdks/python/apache_beam/runners/worker/operations.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sdks/python/apache_beam/runners/worker/operations.py b/sdks/python/apache_beam/runners/worker/operations.py index d0f7cceb558f..a83ccec1b8c1 100644 --- a/sdks/python/apache_beam/runners/worker/operations.py +++ b/sdks/python/apache_beam/runners/worker/operations.py @@ -1190,9 +1190,10 @@ def create_pgbk_op(step_name, spec, counter_factory, state_sampler): class PGBKOperation(Operation): """Partial group-by-key operation. - This takes (windowed) input (key, value) tuples and outputs - (key, [value]) tuples, performing a best effort group-by-key for - values in this bundle, memory permitting. + This operations groups elements by key within a + bundle. + It stores values temporarily and flushes based + on size. """ def __init__(self, name_context, spec, counter_factory, state_sampler): super(PGBKOperation, From 247d2f840a25488324fcf9f7e616f96cb1fcb3b7 Mon Sep 17 00:00:00 2001 From: SubramanyaV Date: Wed, 25 Mar 2026 15:44:17 +0530 Subject: [PATCH 2/4] Fix docstring is satisfy lint --- .gitignore | 8 ++++++++ sdks/python/apache_beam/runners/worker/operations.py | 11 +++++------ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index e9fe331cb316..8569920676b5 100644 --- a/.gitignore +++ b/.gitignore @@ -162,3 +162,11 @@ playground/cloudfunction.zip # as its generated with terraform .test-infra/metrics/sync/github/github_runs_prefetcher/code.zip .venv/ + + +beam-env/ +build/ +*.pyc +__pycache__/ +*.pyd +*.egg-info/ diff --git a/sdks/python/apache_beam/runners/worker/operations.py b/sdks/python/apache_beam/runners/worker/operations.py index a83ccec1b8c1..90e2cfb44547 100644 --- a/sdks/python/apache_beam/runners/worker/operations.py +++ b/sdks/python/apache_beam/runners/worker/operations.py @@ -1188,13 +1188,12 @@ def create_pgbk_op(step_name, spec, counter_factory, state_sampler): class PGBKOperation(Operation): - """Partial group-by-key operation. +"""Partial group-by-key operation. - This operations groups elements by key within a - bundle. - It stores values temporarily and flushes based - on size. - """ +This takes (windowed) input (key, value) tuples and outputs +(key, [value]) tuples, performing a best-effort group-by-key +for values in this bundle, memory permitting. +""" def __init__(self, name_context, spec, counter_factory, state_sampler): super(PGBKOperation, self).__init__(name_context, spec, counter_factory, state_sampler) From ff489b5ef4f70f6326f37bc02fa7a22945cac574 Mon Sep 17 00:00:00 2001 From: SubramanyaV Date: Wed, 25 Mar 2026 16:27:21 +0530 Subject: [PATCH 3/4] Add costring for PGBKCVOperation --- .../transforms/aggregation/groupby_test.py | 1 - .../apache_beam/runners/worker/operations.py | 15 ++++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/groupby_test.py b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/groupby_test.py index 0d2e5dfe25ad..cc3fd5c3ccd1 100644 --- a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/groupby_test.py +++ b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/groupby_test.py @@ -39,7 +39,6 @@ from .groupby_two_exprs import groupby_two_exprs - class UnorderedList(object): def __init__(self, contents): self._contents = list(contents) diff --git a/sdks/python/apache_beam/runners/worker/operations.py b/sdks/python/apache_beam/runners/worker/operations.py index 90e2cfb44547..f96eb3728717 100644 --- a/sdks/python/apache_beam/runners/worker/operations.py +++ b/sdks/python/apache_beam/runners/worker/operations.py @@ -1188,12 +1188,12 @@ def create_pgbk_op(step_name, spec, counter_factory, state_sampler): class PGBKOperation(Operation): -"""Partial group-by-key operation. + """Partial group-by-key operation. -This takes (windowed) input (key, value) tuples and outputs -(key, [value]) tuples, performing a best-effort group-by-key -for values in this bundle, memory permitting. -""" + This takes (windowed) input (key, value) tuples and outputs + (key, [value]) tuples, performing a best effort group-by-key for + values in this bundle, memory permitting. + """ def __init__(self, name_context, spec, counter_factory, state_sampler): super(PGBKOperation, self).__init__(name_context, spec, counter_factory, state_sampler) @@ -1234,6 +1234,11 @@ def flush(self, target): class PGBKCVOperation(Operation): + """Partial group-by-key operation. + + This operation handles grouped values with +a combine function applied. + """ def __init__( self, name_context, spec, counter_factory, state_sampler, windowing=None): super(PGBKCVOperation, From 85e8ffdcb38b23404b1cb371d16aaedc2bb4d025 Mon Sep 17 00:00:00 2001 From: SubramanyaV Date: Thu, 26 Mar 2026 11:45:21 +0530 Subject: [PATCH 4/4] Revert untended .gitignore changes --- .gitignore | 8 -------- 1 file changed, 8 deletions(-) diff --git a/.gitignore b/.gitignore index 8569920676b5..e9fe331cb316 100644 --- a/.gitignore +++ b/.gitignore @@ -162,11 +162,3 @@ playground/cloudfunction.zip # as its generated with terraform .test-infra/metrics/sync/github/github_runs_prefetcher/code.zip .venv/ - - -beam-env/ -build/ -*.pyc -__pycache__/ -*.pyd -*.egg-info/