Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
560 changes: 560 additions & 0 deletions source/examples/cuml-ray-hpo/notebook.ipynb

Large diffs are not rendered by default.

14 changes: 14 additions & 0 deletions source/examples/cuml-ray-hpo/setup/env.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
name: ray-cuml
channels:
- rapidsai
- conda-forge
dependencies:
- python=3.13
- "ray-default=2.53.0"
- "ray-data=2.53.0"
- "ray-train=2.53.0"
- "ray-tune=2.53.0"
- cuml=26.02
- "cuda-version=13.0"
- ipykernel
- ipywidgets
71 changes: 71 additions & 0 deletions source/examples/cuml-ray-hpo/setup/get_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import argparse
import os
from urllib.request import urlretrieve

# If script is in setup/, use parent directory; otherwise use script directory or cwd
_script_dir = os.path.dirname(os.path.abspath(__file__))
if os.path.basename(_script_dir) == "setup":
# Script is in setup/ directory, use parent directory
_data_dir = os.path.join(os.path.dirname(_script_dir), "data")
else:
# Script is not in expected location, use current working directory
_data_dir = os.path.join(os.getcwd(), "data")


def prepare_dataset(use_full_dataset=False):
"""
Download the airline dataset.

Parameters
----------
use_full_dataset : bool, default=False
If True, downloads the full dataset (20M rows).
If False, downloads the small dataset.
"""
data_dir = _data_dir

# Set filename based on dataset size
if use_full_dataset:
file_name = "airlines.parquet"
url = "https://data.rapids.ai/cloud-ml/airline_20000000.parquet"
else:
file_name = "airlines_small.parquet"
url = "https://data.rapids.ai/cloud-ml/airline_small.parquet"

parquet_name = os.path.join(data_dir, file_name)

if os.path.isfile(parquet_name):
print(f" > File already exists. Ready to load at {parquet_name}")
else:
# Ensure folder exists
os.makedirs(data_dir, exist_ok=True)

def data_progress_hook(block_number, read_size, total_filesize):
if (block_number % 1000) == 0:
print(
f" > percent complete: { 100 * ( block_number * read_size ) / total_filesize:.2f}\r",
end="",
)
return

urlretrieve(
url=url,
filename=parquet_name,
reporthook=data_progress_hook,
)

print(f" > Download complete {file_name}")


if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Download airline dataset for cuML Ray HPO example"
)
parser.add_argument(
"--full-dataset",
action="store_true",
help="Download the full dataset (20M rows) instead of the small dataset",
)
args = parser.parse_args()

prepare_dataset(use_full_dataset=args.full_dataset)
14 changes: 14 additions & 0 deletions source/examples/cuml-ray-hpo/setup/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[project]
name = "ray-cuml"
version = "0.1.0"
requires-python = "==3.13.*"
dependencies = [
"ray[default]==2.53.0",
"ray[data]==2.53.0",
"ray[train]==2.53.0",
"ray[tune]==2.53.0",
"cuml-cu12==26.2.*",
"jupyterlab-nvdashboard",
"ipykernel",
"ipywidgets"
]
1 change: 1 addition & 0 deletions source/examples/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,5 @@ cuml-snowflake-nb/notebook
rapids-coiled-cudf/notebook
rapids-morpheus-pipeline/notebook
lulc-classification-gpu/notebook
cuml-ray-hpo/notebook
```