Skip to content

Commit e3e570c

Browse files
author
Your Name
committed
docs: update flow tutorial to use SklearnExtension.model_to_flow
1 parent 17d690f commit e3e570c

1 file changed

Lines changed: 9 additions & 43 deletions

File tree

Lines changed: 9 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,10 @@
11
# %% [markdown]
22
# A simple tutorial on how to upload results from a machine learning experiment to OpenML.
3-
43
# %%
54
import sklearn
65
from sklearn.neighbors import KNeighborsClassifier
7-
86
import openml
9-
7+
from openml_sklearn import SklearnExtension
108
# %% [markdown]
119
# <div class="admonition warning">
1210
# <p class="admonition-title">Warning</p>
@@ -20,68 +18,39 @@
2018
# OpenML-Python API.
2119
# </p>
2220
# </div>
23-
2421
# %%
2522
openml.config.start_using_configuration_for_example()
26-
2723
# %% [markdown]
2824
# ## Train a machine learning model and evaluate it
2925
# NOTE: We are using task 119 from the test server: https://test.openml.org/d/20
30-
3126
# %%
3227
task = openml.tasks.get_task(119)
33-
3428
# Get the data
3529
dataset = task.get_dataset()
3630
X, y, categorical_indicator, attribute_names = dataset.get_data(
3731
target=dataset.default_target_attribute
3832
)
39-
4033
# Get the holdout split from the task
4134
train_indices, test_indices = task.get_train_test_split_indices(fold=0, repeat=0)
4235
X_train, X_test = X.iloc[train_indices], X.iloc[test_indices]
4336
y_train, y_test = y.iloc[train_indices], y.iloc[test_indices]
44-
45-
knn_parameters = {
46-
"n_neighbors": 3,
47-
}
48-
clf = KNeighborsClassifier(**knn_parameters)
37+
clf = KNeighborsClassifier(n_neighbors=3)
4938
clf.fit(X_train, y_train)
50-
5139
# Get experiment results
5240
y_pred = clf.predict(X_test)
5341
y_pred_proba = clf.predict_proba(X_test)
54-
5542
# %% [markdown]
5643
# ## Upload the machine learning experiments to OpenML
57-
# First, create a fow and fill it with metadata about the machine learning model.
58-
44+
# Create a flow from the trained model using the sklearn extension.
45+
# This automatically extracts all metadata and hyperparameters.
5946
# %%
60-
knn_flow = openml.flows.OpenMLFlow(
61-
# Metadata
62-
model=clf, # or None, if you do not want to upload the model object.
63-
name="CustomKNeighborsClassifier",
64-
description="A custom KNeighborsClassifier flow for OpenML.",
65-
external_version=f"{sklearn.__version__}",
66-
language="English",
67-
tags=["openml_tutorial_knn"],
68-
dependencies=f"{sklearn.__version__}",
69-
# Hyperparameters
70-
parameters={k: str(v) for k, v in knn_parameters.items()},
71-
parameters_meta_info={
72-
"n_neighbors": {"description": "number of neighbors to use", "data_type": "int"}
73-
},
74-
# If you have a pipeline with subcomponents, such as preprocessing, add them here.
75-
components={},
76-
)
47+
extension = SklearnExtension()
48+
knn_flow = extension.model_to_flow(clf)
7749
knn_flow.publish()
7850
print(f"knn_flow was published with the ID {knn_flow.flow_id}")
79-
8051
# %% [markdown]
8152
# Second, we create a run to store the results associated with the flow.
82-
8353
# %%
84-
8554
# Format the predictions for OpenML
8655
predictions = []
8756
for test_index, y_true_i, y_pred_i, y_pred_proba_i in zip(
@@ -98,13 +67,11 @@
9867
proba=dict(zip(task.class_labels, y_pred_proba_i)),
9968
)
10069
)
101-
102-
# Format the parameters for OpenML
70+
# Get parameters from the flow
10371
oml_knn_parameters = [
10472
{"oml:name": k, "oml:value": v, "oml:component": knn_flow.flow_id}
105-
for k, v in knn_parameters.items()
73+
for k, v in knn_flow.parameters.items()
10674
]
107-
10875
knn_run = openml.runs.OpenMLRun(
10976
task_id=task.task_id,
11077
flow_id=knn_flow.flow_id,
@@ -117,6 +84,5 @@
11784
knn_run = knn_run.publish()
11885
print(f"Run was uploaded to {knn_run.openml_url}")
11986
print(f"The flow can be found at {knn_run.flow.openml_url}")
120-
12187
# %%
122-
openml.config.stop_using_configuration_for_example()
88+
openml.config.stop_using_configuration_for_example()

0 commit comments

Comments
 (0)