From f3345603e1d059c3511e716897ecd8452cd841f7 Mon Sep 17 00:00:00 2001 From: Angky William Date: Fri, 9 Jan 2026 21:41:38 -0800 Subject: [PATCH] normalize length --- src/art/dev/train.py | 3 +++ src/art/local/backend.py | 3 +++ src/art/preprocessing/tokenize.py | 11 ++++++++--- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/art/dev/train.py b/src/art/dev/train.py index bd415074..7d95d6bf 100644 --- a/src/art/dev/train.py +++ b/src/art/dev/train.py @@ -10,6 +10,9 @@ class TrainConfig(TypedDict, total=False): positive advantages. Defaults to 0.0 (perfectly balanced).""" allow_training_without_logprobs: bool epsilon: float # clip epsilon, using the same name as TRL + normalize_by_length: bool + """When True (default), divides loss by response length. Set to False for \ +Dr. GRPO which removes length normalization bias.""" epsilon_high: ( float | None ) # asymmetric clip upper bound. Defaults to epsilon when None diff --git a/src/art/local/backend.py b/src/art/local/backend.py index eefde0d8..6d350aee 100644 --- a/src/art/local/backend.py +++ b/src/art/local/backend.py @@ -174,6 +174,7 @@ def _get_packed_tensors( advantage_balance: float, allow_training_without_logprobs: bool, scale_rewards: bool, + normalize_by_length: bool, plot_tensors: bool, ) -> PackedTensors | None: if model.base_model not in self._tokenizers: @@ -195,6 +196,7 @@ def _get_packed_tensors( allow_training_without_logprobs, scale_rewards, image_processor=self._image_processors[model.base_model], + normalize_by_length=normalize_by_length, ) ) if not tokenized_results: @@ -458,6 +460,7 @@ async def _train_model( "allow_training_without_logprobs", False ), scale_rewards=dev_config.get("scale_rewards", True), + normalize_by_length=dev_config.get("normalize_by_length", True), plot_tensors=dev_config.get("plot_tensors", False), ) if packed_tensors is None: diff --git a/src/art/preprocessing/tokenize.py b/src/art/preprocessing/tokenize.py index c67db9c3..bc131b33 100644 --- a/src/art/preprocessing/tokenize.py +++ b/src/art/preprocessing/tokenize.py @@ -51,6 +51,7 @@ def tokenize_trajectory_groups( scale_rewards: bool, shuffle_group_trajectories: bool = True, image_processor: BaseImageProcessor | None = None, + normalize_by_length: bool = True, ) -> Generator["TokenizedResult", None, None]: for group in trajectory_groups: if not group: @@ -86,9 +87,13 @@ def tokenize_trajectory_groups( allow_training_without_logprobs, ): trajectory_results.append(result) - weight = 1 / ( - sum(sum(result.assistant_mask) for result in trajectory_results) + 1e-6 - ) + if normalize_by_length: + weight = 1 / ( + sum(sum(result.assistant_mask) for result in trajectory_results) + + 1e-6 + ) + else: + weight = 1.0 for result in trajectory_results: result.weight = weight results.extend(trajectory_results)