From 57cabe0b064576ee0c341331924409f0e5cbaf1e Mon Sep 17 00:00:00 2001 From: Prince Shakya Date: Sun, 24 May 2026 04:38:17 +0530 Subject: [PATCH] fix(utils): download tokenizer models locally via HTTP to avoid TF GCS C++ segfault on macOS --- gemma/gm/utils/_file_cache.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/gemma/gm/utils/_file_cache.py b/gemma/gm/utils/_file_cache.py index e4598139..a7b0597f 100644 --- a/gemma/gm/utils/_file_cache.py +++ b/gemma/gm/utils/_file_cache.py @@ -28,12 +28,24 @@ def maybe_get_from_cache( remote_file_path: epath.PathLike, cache_subdir: str, ) -> epath.Path: - """Returns the cached file if exists, otherwise returns the remote file path.""" - filename = epath.Path(remote_file_path).name + """Returns the cached file if exists, otherwise downloads it and returns local path.""" + remote_path_str = str(remote_file_path) + filename = epath.Path(remote_path_str).name - cache_filepath = _get_cache_dir() / cache_subdir / filename + cache_dir = _get_cache_dir() / cache_subdir + cache_filepath = cache_dir / filename if cache_filepath.exists(): return cache_filepath + + if remote_path_str.startswith('gs://'): + cache_dir.mkdir(parents=True, exist_ok=True) + http_url = remote_path_str.replace('gs://', 'https://storage.googleapis.com/') + print(f"Downloading {filename} to {cache_filepath}...") + + import urllib.request + urllib.request.urlretrieve(http_url, str(cache_filepath)) + return cache_filepath + return epath.Path(remote_file_path)