From 52d04e0b3dab816f6f774b71bde7978c386a79ab Mon Sep 17 00:00:00 2001 From: Quentin Lhoest <42851186+lhoestq@users.noreply.github.com> Date: Wed, 11 Feb 2026 17:42:01 +0100 Subject: [PATCH 1/3] Update file download implementation Refactor download implementation to use resolved path from hffs. --- streaming/base/storage/download.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/streaming/base/storage/download.py b/streaming/base/storage/download.py index 6e437613a..b5e98a363 100644 --- a/streaming/base/storage/download.py +++ b/streaming/base/storage/download.py @@ -502,16 +502,17 @@ def clean_up(self) -> None: def _download_file_impl(self, remote: str, local: str, timeout: float) -> None: """Implementation of the download function for a file.""" - from huggingface_hub import hf_hub_download + from huggingface_hub import hf_hub_download, hffs - _, _, _, repo_org, repo_name, path = remote.split('/', 5) + resolved_path = hffs.resolve_path(remote) local_dirname = os.path.dirname(local) - hf_hub_download(repo_id=f'{repo_org}/{repo_name}', - filename=path, - repo_type='dataset', + hf_hub_download(repo_id=resolved_path.repo_id, + filename=resolved_path.path_in_repo, + repo_type=resolved_path.repo_type, + revision=resolved_path.revision, local_dir=local_dirname) - downloaded_name = os.path.join(local_dirname, path) + downloaded_name = os.path.join(local_dirname, resolved_path.path_in_repo) os.rename(downloaded_name, local) From bacf9d80c6a6af3efb776590e8ccd96cf016159c Mon Sep 17 00:00:00 2001 From: Quentin Lhoest <42851186+lhoestq@users.noreply.github.com> Date: Wed, 11 Feb 2026 17:52:04 +0100 Subject: [PATCH 2/3] use hffs.download for simplicity --- streaming/base/storage/download.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/streaming/base/storage/download.py b/streaming/base/storage/download.py index b5e98a363..13a95ffa5 100644 --- a/streaming/base/storage/download.py +++ b/streaming/base/storage/download.py @@ -502,18 +502,9 @@ def clean_up(self) -> None: def _download_file_impl(self, remote: str, local: str, timeout: float) -> None: """Implementation of the download function for a file.""" - from huggingface_hub import hf_hub_download, hffs - - resolved_path = hffs.resolve_path(remote) - local_dirname = os.path.dirname(local) - hf_hub_download(repo_id=resolved_path.repo_id, - filename=resolved_path.path_in_repo, - repo_type=resolved_path.repo_type, - revision=resolved_path.revision, - local_dir=local_dirname) - - downloaded_name = os.path.join(local_dirname, resolved_path.path_in_repo) - os.rename(downloaded_name, local) + from huggingface_hub import hffs + + hffs.download(remote, local) class AzureDownloader(CloudDownloader): From f3a0ccb3a87ad52903617f1992eedb3e277549a6 Mon Sep 17 00:00:00 2001 From: Quentin Lhoest <42851186+lhoestq@users.noreply.github.com> Date: Wed, 11 Feb 2026 17:56:48 +0100 Subject: [PATCH 3/3] Update huggingface_hub dependency version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ff8420800..ff352109c 100644 --- a/setup.py +++ b/setup.py @@ -126,7 +126,7 @@ ] extra_deps['hf'] = [ - 'huggingface_hub>=0.23.4,<1.4', + 'huggingface_hub>=1.2.1,<1.4', ] extra_deps['testing'] = [