From fee107ada1f67572fd2849cfeabb3d3b5b333289 Mon Sep 17 00:00:00 2001 From: aIbrahiim Date: Mon, 16 Mar 2026 14:22:11 +0200 Subject: [PATCH] Fix vLLM Gemma benchmark by updating transformers version --- ...m_Inference_Python_Benchmarks_Dataflow_VLLM_Gemma_Batch.txt | 1 + .../examples/inference/pytorch_language_modeling.py | 3 +-- sdks/python/apache_beam/examples/inference/vllm_gemma_batch.py | 1 - .../apache_beam/ml/inference/vllm_tests_requirements.txt | 3 ++- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_VLLM_Gemma_Batch.txt b/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_VLLM_Gemma_Batch.txt index 23af8197d8d4..fd2101afa3f1 100644 --- a/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_VLLM_Gemma_Batch.txt +++ b/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_VLLM_Gemma_Batch.txt @@ -32,5 +32,6 @@ --metrics_table=gemma_vllm_batch --influx_measurement=gemma_vllm_batch --model_gcs_path=gs://apache-beam-ml/models/gemma-2b-it +--requirements_file=apache_beam/ml/inference/vllm_tests_requirements.txt --dataflow_service_options=worker_accelerator=type:nvidia-tesla-t4;count:1;install-nvidia-driver --experiments=use_runner_v2 diff --git a/sdks/python/apache_beam/examples/inference/pytorch_language_modeling.py b/sdks/python/apache_beam/examples/inference/pytorch_language_modeling.py index 946c4fadd113..d995df09a159 100644 --- a/sdks/python/apache_beam/examples/inference/pytorch_language_modeling.py +++ b/sdks/python/apache_beam/examples/inference/pytorch_language_modeling.py @@ -52,8 +52,7 @@ def tokenize_sentence( text_and_mask: tuple[str, str], bert_tokenizer: BertTokenizer) -> tuple[str, dict[str, torch.Tensor]]: text, masked_text = text_and_mask - tokenized_sentence = bert_tokenizer.encode_plus( - masked_text, return_tensors="pt") + tokenized_sentence = bert_tokenizer(masked_text, return_tensors="pt") # Workaround to manually remove batch dim until we have the feature to # add optional batching flag. diff --git a/sdks/python/apache_beam/examples/inference/vllm_gemma_batch.py b/sdks/python/apache_beam/examples/inference/vllm_gemma_batch.py index f6e33e5be786..c30e8991d665 100644 --- a/sdks/python/apache_beam/examples/inference/vllm_gemma_batch.py +++ b/sdks/python/apache_beam/examples/inference/vllm_gemma_batch.py @@ -103,7 +103,6 @@ def run(argv=None, save_main_session=True, test_pipeline=None): gem = opts.view_as(GemmaVLLMOptions) opts.view_as(SetupOptions).save_main_session = save_main_session - logging.info("Pipeline starting with model path: %s", gem.model_gcs_path) handler = GcsVLLMCompletionsModelHandler( model_name=gem.model_gcs_path, diff --git a/sdks/python/apache_beam/ml/inference/vllm_tests_requirements.txt b/sdks/python/apache_beam/ml/inference/vllm_tests_requirements.txt index 0f8c6a6a673d..ad5877edbec5 100644 --- a/sdks/python/apache_beam/ml/inference/vllm_tests_requirements.txt +++ b/sdks/python/apache_beam/ml/inference/vllm_tests_requirements.txt @@ -17,6 +17,7 @@ torch>=1.7.1 torchvision>=0.8.2 pillow>=8.0.0 -transformers>=4.18.0 +transformers==4.57.1 +sentencepiece==0.2.1 google-cloud-monitoring>=2.27.0 openai>=1.52.2