From d3b6aaf593311932fec24738f63f165f9fc01ca4 Mon Sep 17 00:00:00 2001 From: Kotha Dhakshin <179742818+Dhakshin2007@users.noreply.github.com> Date: Tue, 7 Apr 2026 18:29:52 +0530 Subject: [PATCH 1/2] fix: missing f-string in _normalize_token + XOR bug in _remove_eos_token --- gemma/gm/text/_sampler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gemma/gm/text/_sampler.py b/gemma/gm/text/_sampler.py index 5b8ae163..710e321d 100644 --- a/gemma/gm/text/_sampler.py +++ b/gemma/gm/text/_sampler.py @@ -577,7 +577,7 @@ def _normalize_token(tokenizer, token: str | int) -> int: token_id = tokenizer.encode(token) if len(token_id) != 1: raise ValueError( - 'Invalid token: {token!r}. `stop_token`s and `forbidden_token`s must' + f'Invalid token: {token!r}. `stop_token`s and `forbidden_token`s must' ' map to single token ids in the vocab.' ) (token_id,) = token_id From 9b60f4cbada013ea5c84038cb17ff568cd3bedab Mon Sep 17 00:00:00 2001 From: Kotha Dhakshin <179742818+Dhakshin2007@users.noreply.github.com> Date: Tue, 7 Apr 2026 18:48:55 +0530 Subject: [PATCH 2/2] fix: replace XOR with AND-NOT in _remove_eos_token done update The `done` flag was updated using XOR (^): done = state.done ^ (state.last_token == EOS) In a batched setting, this incorrectly flips `done` to True for any batch element whose last_token happens to equal EOS even when `state.done` was already False. The correct operation is AND-NOT (&~), which only ever clears the `done` flag (never sets it): done = state.done & ~(state.last_token == EOS) --- gemma/gm/text/_chat_sampler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gemma/gm/text/_chat_sampler.py b/gemma/gm/text/_chat_sampler.py index 12f6d04d..bbb24522 100644 --- a/gemma/gm/text/_chat_sampler.py +++ b/gemma/gm/text/_chat_sampler.py @@ -272,9 +272,9 @@ def _remove_eos_token( return dataclasses.replace( state, step=state.step - 1, - # done is True and last_token is EOS => False + # done is True and last_token is EOS => False (un-done it) # Otherwise, keep the same. - done=state.done ^ (state.last_token == tokenizer.special_tokens.EOS), + done=state.done & ~(state.last_token == tokenizer.special_tokens.EOS), last_token_pos=state.last_token_pos - 1, cache=cache_info.cache, )