11# %% [markdown]
22# A simple tutorial on how to upload results from a machine learning experiment to OpenML.
3-
43# %%
54import sklearn
65from sklearn .neighbors import KNeighborsClassifier
7-
86import openml
9-
7+ from openml_sklearn import SklearnExtension
108# %% [markdown]
119# <div class="admonition warning">
1210# <p class="admonition-title">Warning</p>
2018# OpenML-Python API.
2119# </p>
2220# </div>
23-
2421# %%
2522openml .config .start_using_configuration_for_example ()
26-
2723# %% [markdown]
2824# ## Train a machine learning model and evaluate it
2925# NOTE: We are using task 119 from the test server: https://test.openml.org/d/20
30-
3126# %%
3227task = openml .tasks .get_task (119 )
33-
3428# Get the data
3529dataset = task .get_dataset ()
3630X , y , categorical_indicator , attribute_names = dataset .get_data (
3731 target = dataset .default_target_attribute
3832)
39-
4033# Get the holdout split from the task
4134train_indices , test_indices = task .get_train_test_split_indices (fold = 0 , repeat = 0 )
4235X_train , X_test = X .iloc [train_indices ], X .iloc [test_indices ]
4336y_train , y_test = y .iloc [train_indices ], y .iloc [test_indices ]
44-
45- knn_parameters = {
46- "n_neighbors" : 3 ,
47- }
48- clf = KNeighborsClassifier (** knn_parameters )
37+ clf = KNeighborsClassifier (n_neighbors = 3 )
4938clf .fit (X_train , y_train )
50-
5139# Get experiment results
5240y_pred = clf .predict (X_test )
5341y_pred_proba = clf .predict_proba (X_test )
54-
5542# %% [markdown]
5643# ## Upload the machine learning experiments to OpenML
57- # First, create a fow and fill it with metadata about the machine learning model .
58-
44+ # Create a flow from the trained model using the sklearn extension .
45+ # This automatically extracts all metadata and hyperparameters.
5946# %%
60- knn_flow = openml .flows .OpenMLFlow (
61- # Metadata
62- model = clf , # or None, if you do not want to upload the model object.
63- name = "CustomKNeighborsClassifier" ,
64- description = "A custom KNeighborsClassifier flow for OpenML." ,
65- external_version = f"{ sklearn .__version__ } " ,
66- language = "English" ,
67- tags = ["openml_tutorial_knn" ],
68- dependencies = f"{ sklearn .__version__ } " ,
69- # Hyperparameters
70- parameters = {k : str (v ) for k , v in knn_parameters .items ()},
71- parameters_meta_info = {
72- "n_neighbors" : {"description" : "number of neighbors to use" , "data_type" : "int" }
73- },
74- # If you have a pipeline with subcomponents, such as preprocessing, add them here.
75- components = {},
76- )
47+ extension = SklearnExtension ()
48+ knn_flow = extension .model_to_flow (clf )
7749knn_flow .publish ()
7850print (f"knn_flow was published with the ID { knn_flow .flow_id } " )
79-
8051# %% [markdown]
8152# Second, we create a run to store the results associated with the flow.
82-
8353# %%
84-
8554# Format the predictions for OpenML
8655predictions = []
8756for test_index , y_true_i , y_pred_i , y_pred_proba_i in zip (
9867 proba = dict (zip (task .class_labels , y_pred_proba_i )),
9968 )
10069 )
101-
102- # Format the parameters for OpenML
70+ # Get parameters from the flow
10371oml_knn_parameters = [
10472 {"oml:name" : k , "oml:value" : v , "oml:component" : knn_flow .flow_id }
105- for k , v in knn_parameters .items ()
73+ for k , v in knn_flow . parameters .items ()
10674]
107-
10875knn_run = openml .runs .OpenMLRun (
10976 task_id = task .task_id ,
11077 flow_id = knn_flow .flow_id ,
11784knn_run = knn_run .publish ()
11885print (f"Run was uploaded to { knn_run .openml_url } " )
11986print (f"The flow can be found at { knn_run .flow .openml_url } " )
120-
12187# %%
122- openml .config .stop_using_configuration_for_example ()
88+ openml .config .stop_using_configuration_for_example ()
0 commit comments