probabl-ai · glemaitre · Oct 3, 2025 · Oct 3, 2025 · Oct 3, 2025
diff --git a/content/python_files/feature_engineering.py b/content/python_files/feature_engineering.py
@@ -28,23 +28,21 @@
 # ## Environment setup
 #
 # We need to install some extra dependencies for this notebook if needed (when
-# running jupyterlite). We need the development version of skrub to be able to
-# use the skrub expressions.
+# running jupyterlite).
 
 # %%
 # %pip install -q https://pypi.anaconda.org/ogrisel/simple/polars/1.24.0/polars-1.24.0-cp39-abi3-emscripten_3_1_58_wasm32.whl
-# %pip install -q https://pypi.anaconda.org/ogrisel/simple/skrub/0.6.dev0/skrub-0.6.dev0-py3-none-any.whl
-# %pip install -q altair holidays plotly nbformat
+# %pip install -q skrub altair holidays plotly nbformat
 
 # %% [markdown]
 #
 # The following 3 imports are only needed to workaround some limitations when
 # using polars in a pyodide/jupyterlite notebook.
 #
-# TODO: remove those workarounds once pyodide 0.28 is released with support for
-# the latest polars version.
+# TODO: remove those workarounds once pyodide enables again the package:
+# xref: https://github.com/pyodide/pyodide-recipes/blob/0.29.X/packages/polars/meta.yaml
 
-# %%
+ # %%
 import tzdata  # noqa: F401
 import pandas as pd
 from pyarrow.parquet import read_table

diff --git a/content/python_files/multiple_horizons_prediction.py b/content/python_files/multiple_horizons_prediction.py
@@ -5,13 +5,11 @@
 # ## Environment setup
 #
 # We need to install some extra dependencies for this notebook if needed (when
-# running jupyterlite). We need the development version of skrub to be able to
-# use the skrub expressions.
+# running jupyterlite).
 
 # %%
 # %pip install -q https://pypi.anaconda.org/ogrisel/simple/polars/1.24.0/polars-1.24.0-cp39-abi3-emscripten_3_1_58_wasm32.whl
-# %pip install -q https://pypi.anaconda.org/ogrisel/simple/skrub/0.6.dev0/skrub-0.6.dev0-py3-none-any.whl
-# %pip install -q altair holidays plotly nbformat
+# %pip install -q skrub altair holidays plotly nbformat
 
 # %%
 import datetime

diff --git a/content/python_files/parallel_coordinates_hgbr.json b/content/python_files/parallel_coordinates_hgbr.json
diff --git a/content/python_files/parallel_coordinates_ridge.json b/content/python_files/parallel_coordinates_ridge.json
diff --git a/content/python_files/prediction_intervals.py b/content/python_files/prediction_intervals.py
@@ -5,13 +5,11 @@
 # ## Environment setup
 #
 # We need to install some extra dependencies for this notebook if needed (when
-# running jupyterlite). We need the development version of skrub to be able to
-# use the skrub expressions.
+# running jupyterlite).
 
 # %%
 # %pip install -q https://pypi.anaconda.org/ogrisel/simple/polars/1.24.0/polars-1.24.0-cp39-abi3-emscripten_3_1_58_wasm32.whl
-# %pip install -q https://pypi.anaconda.org/ogrisel/simple/skrub/0.6.dev0/skrub-0.6.dev0-py3-none-any.whl
-# %pip install -q altair holidays plotly nbformat
+# %pip install -q skrub altair holidays plotly nbformat
 
 # %%
 import warnings
@@ -128,21 +126,21 @@
 cv_results_hgbr_05 = predictions_hgbr_05.skb.cross_validate(
     cv=ts_cv_5,
     scoring=scoring,
-    return_pipeline=True,
+    return_learner=True,
     verbose=1,
     n_jobs=-1,
 )
 cv_results_hgbr_50 = predictions_hgbr_50.skb.cross_validate(
     cv=ts_cv_5,
     scoring=scoring,
-    return_pipeline=True,
+    return_learner=True,
     verbose=1,
     n_jobs=-1,
 )
 cv_results_hgbr_95 = predictions_hgbr_95.skb.cross_validate(
     cv=ts_cv_5,
     scoring=scoring,
-    return_pipeline=True,
+    return_learner=True,
     verbose=1,
     n_jobs=-1,
 )
@@ -153,13 +151,13 @@
 
 # %%
 cv_predictions_hgbr_05 = collect_cv_predictions(
-    cv_results_hgbr_05["pipeline"], ts_cv_5, predictions_hgbr_05, prediction_time
+    cv_results_hgbr_05["learner"], ts_cv_5, predictions_hgbr_05, prediction_time
 )
 cv_predictions_hgbr_50 = collect_cv_predictions(
-    cv_results_hgbr_50["pipeline"], ts_cv_5, predictions_hgbr_50, prediction_time
+    cv_results_hgbr_50["learner"], ts_cv_5, predictions_hgbr_50, prediction_time
 )
 cv_predictions_hgbr_95 = collect_cv_predictions(
-    cv_results_hgbr_95["pipeline"], ts_cv_5, predictions_hgbr_95, prediction_time
+    cv_results_hgbr_95["learner"], ts_cv_5, predictions_hgbr_95, prediction_time
 )
 
 # %% [markdown]

diff --git a/content/python_files/single_horizon_prediction.py b/content/python_files/single_horizon_prediction.py
@@ -5,13 +5,11 @@
 # ## Environment setup
 #
 # We need to install some extra dependencies for this notebook if needed (when
-# running jupyterlite). We need the development version of skrub to be able to
-# use the skrub expressions.
+# running jupyterlite).
 
 # %%
 # %pip install -q https://pypi.anaconda.org/ogrisel/simple/polars/1.24.0/polars-1.24.0-cp39-abi3-emscripten_3_1_58_wasm32.whl
-# %pip install -q https://pypi.anaconda.org/ogrisel/simple/skrub/0.6.dev0/skrub-0.6.dev0-py3-none-any.whl
-# %pip install -q altair holidays plotly nbformat
+# %pip install -q skrub altair holidays plotly nbformat
 
 # %%
 import warnings
@@ -51,6 +49,7 @@
 #
 # For now, let's focus on the last horizon (24 hours) to train a model
 # predicting the electricity load at the next 24 hours.
+
 # %%
 horizon_of_interest = horizons[-1]  # Focus on the 24-hour horizon
 target_column_name = target_column_name_pattern.format(horizon=horizon_of_interest)
@@ -129,7 +128,7 @@
 # follows:
 
 # %%
-hgbr_pipeline = hgbr_predictions.skb.get_pipeline()
+hgbr_pipeline = hgbr_predictions.skb.make_learner()
 hgbr_pipeline.describe_params()
 
 # %% [markdown]
@@ -138,7 +137,7 @@
 # the steps of the DAG using the following (once uncommented):
 
 # %%
-# predictions.skb.full_report()
+# hgbr_predictions.skb.full_report()
 
 # %% [markdown]
 #
@@ -234,7 +233,7 @@
         "d2_gamma": make_scorer(d2_tweedie_score, power=2.0),
     },
     return_train_score=True,
-    return_pipeline=True,
+    return_learner=True,
     verbose=1,
     n_jobs=-1,
 )
@@ -264,7 +263,7 @@
 
 # %%
 hgbr_cv_predictions = collect_cv_predictions(
-    hgbr_cv_results["pipeline"], ts_cv_5, hgbr_predictions, prediction_time
+    hgbr_cv_results["learner"], ts_cv_5, hgbr_predictions, prediction_time
 )
 hgbr_cv_predictions[0]
 
@@ -356,15 +355,16 @@
 ts_cv_2 = TimeSeriesSplit(
     n_splits=2, test_size=test_size, max_train_size=max_train_size, gap=24
 )
-# randomized_search_hgbr = hgbr_predictions.skb.get_randomized_search(
+# randomized_search_hgbr = hgbr_predictions.skb.make_randomized_search(
 #     cv=ts_cv_2,
 #     scoring="r2",
 #     n_iter=100,
 #     fitted=True,
 #     verbose=1,
 #     n_jobs=-1,
 # )
-# # %%
+
+# %%
 # randomized_search_hgbr.results_.round(3)
 
 # %%
@@ -377,26 +377,26 @@
 
 # %%
 # nested_cv_results = skrub.cross_validate(
-#     environment=predictions.skb.get_data(),
-#     pipeline=randomized_search,
+#     environment=hgbr_predictions.skb.get_data(),
+#     learner=randomized_search_hgbr,
 #     cv=ts_cv_5,
 #     scoring={
 #         "r2": get_scorer("r2"),
 #         "mape": make_scorer(mean_absolute_percentage_error),
 #     },
 #     n_jobs=-1,
-#     return_pipeline=True,
+#     return_learner=True,
 # ).round(3)
 # nested_cv_results
 
 # %%
 # for outer_fold_idx in range(len(nested_cv_results)):
 #     print(
-#         nested_cv_results.loc[outer_fold_idx, "pipeline"]
-#         .results_.loc[0]
+#         nested_cv_results.loc[outer_fold_idx, "learner"]
+#         .results_.loc[:, "mean_test_score"]
 #         .round(3)
 #         .to_dict()
-#     )
+#    )
 
 # %% [markdown]
 #
@@ -501,7 +501,7 @@
         "mape": make_scorer(mean_absolute_percentage_error),
     },
     return_train_score=True,
-    return_pipeline=True,
+    return_learner=True,
     verbose=1,
     n_jobs=-1,
 )
@@ -533,7 +533,7 @@
 
 # %%
 cv_predictions_ridge = collect_cv_predictions(
-    cv_results_ridge["pipeline"], ts_cv_5, predictions_ridge, prediction_time
+    cv_results_ridge["learner"], ts_cv_5, predictions_ridge, prediction_time
 )
 
 # %%
@@ -578,7 +578,7 @@
 # expensive, we are reloading the results of the parallel coordinates plot.
 
 # %%
-# randomized_search_ridge = predictions_ridge.skb.get_randomized_search(
+# randomized_search_ridge = predictions_ridge.skb.make_randomized_search(
 #     cv=ts_cv_2,
 #     scoring="r2",
 #     n_iter=100,
@@ -610,14 +610,14 @@
 # %%
 # nested_cv_results_ridge = skrub.cross_validate(
 #     environment=predictions_ridge.skb.get_data(),
-#     pipeline=randomized_search_ridge,
+#     learner=randomized_search_ridge,
 #     cv=ts_cv_5,
 #     scoring={
 #         "r2": get_scorer("r2"),
 #         "mape": make_scorer(mean_absolute_percentage_error),
 #     },
 #     n_jobs=-1,
-#     return_pipeline=True,
+#     return_learner=True,
 # ).round(3)
 
 # %%

diff --git a/content/python_files/tutorial_helpers.py b/content/python_files/tutorial_helpers.py
@@ -698,7 +698,7 @@ def collect_cv_predictions(
 ):
     index_generator = cv_splitter.split(prediction_time.skb.eval())
 
-    def splitter(X, y, index_generator):
+    def split_func(X, y, index_generator):
         """Workaround to transform a scikit-learn splitter into a function understood
         by `skrub.train_test_split`."""
         train_idx, test_idx = next(index_generator)
@@ -711,7 +711,7 @@ def splitter(X, y, index_generator):
     ):
         split = predictions.skb.train_test_split(
             predictions.skb.get_data(),
-            splitter=splitter,
+            split_func=split_func,
             index_generator=index_generator,
         )
         results.append(