From 082a6067b8475e7f199d07b15172a2b5feb623a4 Mon Sep 17 00:00:00 2001
From: Deepti Mokkapati <deeptim@microsoft.com>
Date: Tue, 9 Feb 2021 15:12:19 -0800
Subject: [PATCH 1/4] update many model script to create child runs

---
 Custom_Script/scripts/train.py | 136 ++++++++++++++++++---------------
 1 file changed, 75 insertions(+), 61 deletions(-)

diff --git a/Custom_Script/scripts/train.py b/Custom_Script/scripts/train.py
index bc641dc4..652ed855 100644
--- a/Custom_Script/scripts/train.py
+++ b/Custom_Script/scripts/train.py
@@ -59,67 +59,81 @@ def run(input_data):
         train = data[:-args.test_size]
         test = data[-args.test_size:]
 
-        # 3.0 Create and fit the forecasting pipeline
-        # The pipeline will drop unhelpful features, make a calendar feature, and make lag features
-        lagger = SimpleLagger(args.target_column, lag_orders=[1, 2, 3, 4])
-        transform_steps = [('column_dropper', ColumnDropper(args.drop_columns)),
-                           ('calendar_featurizer', SimpleCalendarFeaturizer()), ('lagger', lagger)]
-        forecaster = SimpleForecaster(transform_steps, LinearRegression(), args.target_column, args.timestamp_column)
-        forecaster.fit(train)
-        print('Featurized data example:')
-        print(forecaster.transform(train).head())
-
-        # 4.0 Get predictions on test set
-        forecasts = forecaster.forecast(test)
-        compare_data = test.assign(forecasts=forecasts).dropna()
-
-        # 5.0 Calculate accuracy metrics for the fit
-        mse = mean_squared_error(compare_data[args.target_column], compare_data['forecasts'])
-        rmse = np.sqrt(mse)
-        mae = mean_absolute_error(compare_data[args.target_column], compare_data['forecasts'])
-        actuals = compare_data[args.target_column].values
-        preds = compare_data['forecasts'].values
-        mape = np.mean(np.abs((actuals - preds) / actuals) * 100)
-
-        # 6.0 Log metrics
-        current_run.log(model_name + '_mse', mse)
-        current_run.log(model_name + '_rmse', rmse)
-        current_run.log(model_name + '_mae', mae)
-        current_run.log(model_name + '_mape', mape)
-
-        # 7.0 Train model with full dataset
-        forecaster.fit(data)
-
-        # 8.0 Save the forecasting pipeline
-        joblib.dump(forecaster, filename=os.path.join('./outputs/', model_name))
-
-        # 9.0 Register the model to the workspace
-        # Uses the values in the timeseries id columns from the first row of data to form tags for the model
-        current_run.upload_file(model_name, os.path.join('./outputs/', model_name))
-        ts_id_dict = {id_col: str(data[id_col].iloc[0]) for id_col in args.timeseries_id_columns}
-        tags_dict = {**ts_id_dict, 'ModelType': args.model_type}
-        current_run.register_model(model_path=model_name, model_name=model_name,
-                                   model_framework=args.model_type, tags=tags_dict)
-
-        # 10.0 Add data to output
-        end_datetime = datetime.datetime.now()
-        result.update(ts_id_dict)
-        result['model_type'] = args.model_type
-        result['file_name'] = file_name
-        result['model_name'] = model_name
-        result['start_date'] = str(start_datetime)
-        result['end_date'] = str(end_datetime)
-        result['duration'] = str(end_datetime-start_datetime)
-        result['mse'] = mse
-        result['rmse'] = rmse
-        result['mae'] = mae
-        result['mape'] = mape
-        result['index'] = idx
-        result['num_models'] = len(input_data)
-        result['status'] = current_run.get_status()
-
-        print('ending (' + csv_file_path + ') ' + str(end_datetime))
-        result_list.append(result)
+        try:
+            child_run = current_run.child_run(name=model_name)
+
+            # 3.0 Create and fit the forecasting pipeline
+            # The pipeline will drop unhelpful features, make a calendar feature, and make lag features
+            lagger = SimpleLagger(args.target_column, lag_orders=[1, 2, 3, 4])
+            transform_steps = [('column_dropper', ColumnDropper(args.drop_columns)),
+                               ('calendar_featurizer', SimpleCalendarFeaturizer()), ('lagger', lagger)]
+            forecaster = SimpleForecaster(transform_steps, LinearRegression(), args.target_column,
+                                          args.timestamp_column)
+            forecaster.fit(train)
+            print('Featurized data example:')
+            print(forecaster.transform(train).head())
+
+            # 4.0 Get predictions on test set
+            forecasts = forecaster.forecast(test)
+            compare_data = test.assign(forecasts=forecasts).dropna()
+
+            # 5.0 Calculate accuracy metrics for the fit
+            mse = mean_squared_error(compare_data[args.target_column], compare_data['forecasts'])
+            rmse = np.sqrt(mse)
+            mae = mean_absolute_error(compare_data[args.target_column], compare_data['forecasts'])
+            actuals = compare_data[args.target_column].values
+            preds = compare_data['forecasts'].values
+            mape = np.mean(np.abs((actuals - preds) / actuals) * 100)
+
+            # 6.0 Log metrics
+            child_run.log(model_name + '_mse', mse)
+            child_run.log(model_name + '_rmse', rmse)
+            child_run.log(model_name + '_mae', mae)
+            child_run.log(model_name + '_mape', mape)
+
+            # 7.0 Train model with full dataset
+            forecaster.fit(data)
+
+            # import time
+            # time.sleep(180)
+
+            # 8.0 Save the forecasting pipeline
+            joblib.dump(forecaster, filename=os.path.join('./outputs/', model_name))
+
+            # 9.0 Register the model to the workspace
+            # Uses the values in the timeseries id columns from the first row of data to form tags for the model
+            child_run.upload_file(model_name, os.path.join('./outputs/', model_name))
+            ts_id_dict = {id_col: str(data[id_col].iloc[0]) for id_col in args.timeseries_id_columns}
+            tags_dict = {**ts_id_dict, 'ModelType': args.model_type}
+            tags_dict.update({'InputData': os.path.basename(csv_file_path)})
+            tags_dict.update({'StepRunId': current_run.id})
+            tags_dict.update({'RunId': current_run.parent.id})
+            child_run.register_model(model_path=model_name, model_name=model_name,
+                                     model_framework=args.model_type, tags=tags_dict)
+
+            child_run.complete()
+            # 10.0 Add data to output
+            end_datetime = datetime.datetime.now()
+            result.update(ts_id_dict)
+            result['model_type'] = args.model_type
+            result['file_name'] = file_name
+            result['model_name'] = model_name
+            result['start_date'] = str(start_datetime)
+            result['end_date'] = str(end_datetime)
+            result['duration'] = str(end_datetime-start_datetime)
+            result['mse'] = mse
+            result['rmse'] = rmse
+            result['mae'] = mae
+            result['mape'] = mape
+            result['index'] = idx
+            result['num_models'] = len(input_data)
+            result['status'] = child_run.get_status()
+
+            print('ending (' + csv_file_path + ') ' + str(end_datetime))
+            result_list.append(result)
+        except Exception:
+            if child_run and child_run.get_status() != 'Completed':
+                child_run.fail()
 
     # Data returned by this function will be available in parallel_run_step.txt
     return pd.DataFrame(result_list)

From c48f5dd1b7751422378e8e0b3b444e79103e2bfa Mon Sep 17 00:00:00 2001
From: Deepti Mokkapati <deeptim@microsoft.com>
Date: Tue, 9 Feb 2021 15:53:51 -0800
Subject: [PATCH 2/4] changes to customscript

---
 .../02_CustomScript_Training_Pipeline.ipynb   | 259 ++++++++++++++++--
 Custom_Script/scripts/train.py                |  25 +-
 2 files changed, 252 insertions(+), 32 deletions(-)

diff --git a/Custom_Script/02_CustomScript_Training_Pipeline.ipynb b/Custom_Script/02_CustomScript_Training_Pipeline.ipynb
index c7a6e044..1f70b4ed 100644
--- a/Custom_Script/02_CustomScript_Training_Pipeline.ipynb
+++ b/Custom_Script/02_CustomScript_Training_Pipeline.ipynb
@@ -51,7 +51,25 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# !pip install azureml-pipeline-steps"
+    "pip show azureml-sdk"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#!pip install azureml-pipeline-steps"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#!pip install --upgrade azureml-contrib-automl-pipeline-steps"
    ]
   },
   {
@@ -63,16 +81,49 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {
     "scrolled": true
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Workspace Name: deeptim-eastus2euap-ws\n",
+      "Azure Region: eastus2euap\n",
+      "Subscription Id: 381b38e9-9840-4719-a5a0-61d9585e1e91\n",
+      "Resource Group: deeptim_westus_3rg\n"
+     ]
+    }
+   ],
    "source": [
     "from azureml.core import Workspace\n",
     "\n",
     "# set up workspace\n",
-    "ws = Workspace.from_config()\n",
+    "#ws = Workspace.from_config()\n",
+    "\n",
+    "#ws = Workspace.create( subscription_id= \"381b38e9-9840-4719-a5a0-61d9585e1e91\",\n",
+    "#    resource_group= \"deeptim_westus_3rg\",\n",
+    "#   name= \"deeptim-eastus2euap-ws\",\n",
+    "#   create_resource_group=True,\n",
+    "#  location='eastus2euap',\n",
+    " #  )\n",
+    "\n",
+    "ws = Workspace( subscription_id= \"381b38e9-9840-4719-a5a0-61d9585e1e91\",\n",
+    "    resource_group= \"deeptim_westus_3rg\",\n",
+    "    workspace_name= \"deeptim-eastus2euap-ws\",\n",
+    "    )\n",
+    "\n",
+    "#ws = Workspace( subscription_id= \"381b38e9-9840-4719-a5a0-61d9585e1e91\",\n",
+    "#    resource_group= \"deeptim-eastus2euap-rg\",\n",
+    "#    workspace_name= \"deeptim-eastus2euap-ws\",\n",
+    "#    )\n",
+    "\n",
+    "#ws = Workspace( subscription_id= \"ba7979f7-d040-49c9-af1a-7414402bf622\",\n",
+    "#    resource_group= \"deeptim-westcentralus-rg\",\n",
+    "#    workspace_name= \"deeptim-westcentralus-ws\",\n",
+    "#    )\n",
     "\n",
     "# set up datastores\n",
     "dstore = ws.get_default_datastore()\n",
@@ -93,15 +144,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {
     "scrolled": true
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Experiment name: oj-customscript-test\n"
+     ]
+    }
+   ],
    "source": [
     "from azureml.core import Experiment\n",
     "\n",
-    "experiment = Experiment(ws, 'oj_training_pipeline')\n",
+    "experiment = Experiment(ws, 'oj-customscript-test')\n",
     "\n",
     "print('Experiment name: ' + experiment.name)"
    ]
@@ -121,16 +180,86 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
-    "dataset_name = 'oj_data_small_train'"
+    "dataset_name = 'oj_data'"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "blob_datastore_name = \"automl_many_models\"\n",
+    "container_name = \"automl-sample-notebook-data\"\n",
+    "account_name = \"automlsamplenotebookdata\"\n",
+    "\n",
+    "from azureml.core import Datastore\n",
+    "\n",
+    "datastore = Datastore.register_azure_blob_container(\n",
+    "    workspace=ws, \n",
+    "    datastore_name=blob_datastore_name, \n",
+    "    container_name=container_name,\n",
+    "    account_name=account_name,\n",
+    "    create_if_not_exists=True\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{\n",
+       "  \"source\": [\n",
+       "    \"('automl_many_models', 'oj_data_small/')\"\n",
+       "  ],\n",
+       "  \"definition\": [\n",
+       "    \"GetDatastoreFiles\"\n",
+       "  ],\n",
+       "  \"registration\": {\n",
+       "    \"id\": \"f84d587d-a7a8-4a37-a2ac-bd1cb71acb1c\",\n",
+       "    \"name\": \"oj_data_small\",\n",
+       "    \"version\": 1,\n",
+       "    \"workspace\": \"Workspace.create(name='deeptim-eastus2euap-ws', subscription_id='381b38e9-9840-4719-a5a0-61d9585e1e91', resource_group='deeptim_westus_3rg')\"\n",
+       "  }\n",
+       "}"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ds_train_path = 'oj_data_small/'\n",
+    "ds_inference_path = 'oj_inference_small/'\n",
+    "\n",
+    "from azureml.core.dataset import Dataset\n",
+    "\n",
+    "\n",
+    "# Create file datasets\n",
+    "ds_train = Dataset.File.from_files(path=datastore.path(ds_train_path), validate=False)\n",
+    "ds_inference = Dataset.File.from_files(path=datastore.path(ds_inference_path), validate=False)\n",
+    "\n",
+    "# Register the file datasets\n",
+    "#dataset_name = 'oj_data_small' # if 0 < dataset_maxfiles < 11973 else 'oj_data'\n",
+    "dataset_name = 'oj_data_small'\n",
+    "train_dataset_name = dataset_name #+ '_train'\n",
+    "#inference_dataset_name = dataset_name# + '_inference'\n",
+    "ds_train.register(ws, train_dataset_name, create_new_version=True)\n",
+    "#ds_inference.register(ws, inference_dataset_name, create_new_version=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
    "metadata": {
     "scrolled": true
    },
@@ -138,7 +267,7 @@
    "source": [
     "from azureml.core.dataset import Dataset\n",
     "\n",
-    "dataset = Dataset.get_by_name(ws, name=dataset_name)\n",
+    "dataset = Dataset.get_by_name(ws, name=train_dataset_name)\n",
     "dataset_input = dataset.as_named_input(dataset_name)"
    ]
   },
@@ -155,7 +284,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 20,
    "metadata": {
     "scrolled": true
    },
@@ -185,24 +314,66 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [],
    "source": [
-    "cpu_cluster_name = \"cpucluster\""
+    "cpu_cluster_name = \"many-models\""
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 22,
    "metadata": {
     "scrolled": true
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Found existing compute target.\n",
+      "Checking cluster status...\n",
+      "Succeeded\n",
+      "AmlCompute wait for completion finished\n",
+      "\n",
+      "Minimum number of nodes requested have been provisioned\n"
+     ]
+    }
+   ],
    "source": [
     "from azureml.core.compute import AmlCompute\n",
-    "\n",
-    "compute = AmlCompute(ws, cpu_cluster_name)"
+    "from azureml.core.compute import ComputeTarget\n",
+    "\n",
+    "# Choose a name for your cluster.\n",
+    "# TODO\n",
+    "amlcompute_cluster_name = cpu_cluster_name\n",
+    "\n",
+    "found = False\n",
+    "# Check if this compute target already exists in the workspace.\n",
+    "cts = ws.compute_targets\n",
+    "if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlCompute':\n",
+    "    found = True\n",
+    "    print('Found existing compute target.')\n",
+    "    compute = cts[amlcompute_cluster_name]\n",
+    "    \n",
+    "if not found:\n",
+    "    print('Creating a new compute target...')\n",
+    "    provisioning_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D64S_V3',\n",
+    "                                                           min_nodes=0,\n",
+    "                                                           max_nodes=63,\n",
+    "                                                           admin_username='azureuser', \n",
+    "                                                           admin_user_password='Pa$$word1',\n",
+    "                                                           )\n",
+    "    # Create the cluster.\n",
+    "    compute = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n",
+    "    \n",
+    "print('Checking cluster status...')\n",
+    "# Can poll for a minimum number of nodes and for a specific timeout.\n",
+    "# If no min_node_count is provided, it will use the scale settings for the cluster.\n",
+    "compute.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n",
+    "    \n",
+    "# For a more detailed view of current AmlCompute status, use get_status()."
    ]
   },
   {
@@ -228,14 +399,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [],
    "source": [
     "from azureml.pipeline.steps import ParallelRunConfig\n",
     "\n",
     "processes_per_node = 8\n",
-    "node_count = 1\n",
+    "node_count =1\n",
     "timeout = 180\n",
     "\n",
     "parallel_run_config = ParallelRunConfig(\n",
@@ -243,7 +414,7 @@
     "    entry_script='train.py',\n",
     "    mini_batch_size=\"1\",\n",
     "    run_invocation_timeout=timeout,\n",
-    "    error_threshold=10,\n",
+    "    error_threshold=-1,\n",
     "    output_action=\"append_row\",\n",
     "    environment=train_env,\n",
     "    process_count_per_node=processes_per_node,\n",
@@ -251,6 +422,15 @@
     "    node_count=node_count)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#!pip show pytorch"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -264,7 +444,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 25,
    "metadata": {
     "scrolled": true
    },
@@ -290,7 +470,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 26,
    "metadata": {
     "scrolled": true
    },
@@ -323,9 +503,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Created step many-models-training [c4ffbc96][5bf31ad1-e834-4d9e-86f6-5f4e8af18725], (This step will run and generate new outputs)\n",
+      "Submitted PipelineRun be77c836-82e1-4343-a540-28063e624c35\n",
+      "Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/oj-customscript-test/runs/be77c836-82e1-4343-a540-28063e624c35?wsid=/subscriptions/381b38e9-9840-4719-a5a0-61d9585e1e91/resourcegroups/deeptim_westus_3rg/workspaces/deeptim-eastus2euap-ws\n"
+     ]
+    }
+   ],
    "source": [
     "from azureml.pipeline.core import Pipeline\n",
     "\n",
@@ -337,7 +527,16 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "PipelineRunId: be77c836-82e1-4343-a540-28063e624c35\n",
+      "Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/oj-customscript-test/runs/be77c836-82e1-4343-a540-28063e624c35?wsid=/subscriptions/381b38e9-9840-4719-a5a0-61d9585e1e91/resourcegroups/deeptim_westus_3rg/workspaces/deeptim-eastus2euap-ws\n"
+     ]
+    }
+   ],
    "source": [
     "#Wait for the run to complete\n",
     "run.wait_for_completion(show_output=False, raise_on_error=True)"
@@ -575,9 +774,9 @@
    }
   ],
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python [conda env:.conda-pypi] *",
    "language": "python",
-   "name": "python3"
+   "name": "conda-env-.conda-pypi-py"
   },
   "language_info": {
    "codemirror_mode": {
@@ -589,7 +788,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.3"
+   "version": "3.6.7"
   }
  },
  "nbformat": 4,
diff --git a/Custom_Script/scripts/train.py b/Custom_Script/scripts/train.py
index 652ed855..9deae49e 100644
--- a/Custom_Script/scripts/train.py
+++ b/Custom_Script/scripts/train.py
@@ -94,8 +94,9 @@ def run(input_data):
             # 7.0 Train model with full dataset
             forecaster.fit(data)
 
-            # import time
-            # time.sleep(180)
+            # Simulating the 3 minutes run to test concurrency
+            import time
+            time.sleep(180)
 
             # 8.0 Save the forecasting pipeline
             joblib.dump(forecaster, filename=os.path.join('./outputs/', model_name))
@@ -128,12 +129,32 @@ def run(input_data):
             result['index'] = idx
             result['num_models'] = len(input_data)
             result['status'] = child_run.get_status()
+            result['run_id'] = str(child_run.id)
 
             print('ending (' + csv_file_path + ') ' + str(end_datetime))
             result_list.append(result)
         except Exception:
             if child_run and child_run.get_status() != 'Completed':
                 child_run.fail()
+            result['model_type'] = args.model_type
+            end_datetime = datetime.datetime.now()
+            result['file_name'] = file_name
+            result['model_name'] = model_name
+            result['start_date'] = str(start_datetime)
+            result['end_date'] = str(end_datetime)
+            result['duration'] = str(end_datetime-start_datetime)
+            result['mse'] = str(None)
+            result['rmse'] = str(None)
+            result['mae'] = str(None)
+            result['mape'] = str(None)
+            result['index'] = idx
+            result['num_models'] = len(input_data)
+            if child_run:
+                result['status'] = child_run.get_status()
+                result['run_id'] = str(child_run.id)
+            else:
+                result['status'] = 'Failed'
+                result['run_id'] = str(None)
 
     # Data returned by this function will be available in parallel_run_step.txt
     return pd.DataFrame(result_list)

From f27360a87fc6ed3c98eb6c814ad9364e6c6b5f69 Mon Sep 17 00:00:00 2001
From: Deepti Mokkapati <deeptim@microsoft.com>
Date: Tue, 9 Feb 2021 15:57:00 -0800
Subject: [PATCH 3/4] changes wrt scale

---
 .../02_CustomScript_Training_Pipeline.ipynb   | 257 ++----------------
 1 file changed, 29 insertions(+), 228 deletions(-)

diff --git a/Custom_Script/02_CustomScript_Training_Pipeline.ipynb b/Custom_Script/02_CustomScript_Training_Pipeline.ipynb
index 1f70b4ed..d0534aed 100644
--- a/Custom_Script/02_CustomScript_Training_Pipeline.ipynb
+++ b/Custom_Script/02_CustomScript_Training_Pipeline.ipynb
@@ -51,25 +51,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pip show azureml-sdk"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#!pip install azureml-pipeline-steps"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#!pip install --upgrade azureml-contrib-automl-pipeline-steps"
+    "# !pip install azureml-pipeline-steps"
    ]
   },
   {
@@ -81,49 +63,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {
     "scrolled": true
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Workspace Name: deeptim-eastus2euap-ws\n",
-      "Azure Region: eastus2euap\n",
-      "Subscription Id: 381b38e9-9840-4719-a5a0-61d9585e1e91\n",
-      "Resource Group: deeptim_westus_3rg\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from azureml.core import Workspace\n",
     "\n",
     "# set up workspace\n",
-    "#ws = Workspace.from_config()\n",
-    "\n",
-    "#ws = Workspace.create( subscription_id= \"381b38e9-9840-4719-a5a0-61d9585e1e91\",\n",
-    "#    resource_group= \"deeptim_westus_3rg\",\n",
-    "#   name= \"deeptim-eastus2euap-ws\",\n",
-    "#   create_resource_group=True,\n",
-    "#  location='eastus2euap',\n",
-    " #  )\n",
-    "\n",
-    "ws = Workspace( subscription_id= \"381b38e9-9840-4719-a5a0-61d9585e1e91\",\n",
-    "    resource_group= \"deeptim_westus_3rg\",\n",
-    "    workspace_name= \"deeptim-eastus2euap-ws\",\n",
-    "    )\n",
-    "\n",
-    "#ws = Workspace( subscription_id= \"381b38e9-9840-4719-a5a0-61d9585e1e91\",\n",
-    "#    resource_group= \"deeptim-eastus2euap-rg\",\n",
-    "#    workspace_name= \"deeptim-eastus2euap-ws\",\n",
-    "#    )\n",
-    "\n",
-    "#ws = Workspace( subscription_id= \"ba7979f7-d040-49c9-af1a-7414402bf622\",\n",
-    "#    resource_group= \"deeptim-westcentralus-rg\",\n",
-    "#    workspace_name= \"deeptim-westcentralus-ws\",\n",
-    "#    )\n",
+    "ws = Workspace.from_config()\n",
     "\n",
     "# set up datastores\n",
     "dstore = ws.get_default_datastore()\n",
@@ -144,23 +93,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {
     "scrolled": true
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Experiment name: oj-customscript-test\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from azureml.core import Experiment\n",
     "\n",
-    "experiment = Experiment(ws, 'oj-customscript-test')\n",
+    "experiment = Experiment(ws, 'oj_training_pipeline')\n",
     "\n",
     "print('Experiment name: ' + experiment.name)"
    ]
@@ -180,86 +121,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "dataset_name = 'oj_data'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "blob_datastore_name = \"automl_many_models\"\n",
-    "container_name = \"automl-sample-notebook-data\"\n",
-    "account_name = \"automlsamplenotebookdata\"\n",
-    "\n",
-    "from azureml.core import Datastore\n",
-    "\n",
-    "datastore = Datastore.register_azure_blob_container(\n",
-    "    workspace=ws, \n",
-    "    datastore_name=blob_datastore_name, \n",
-    "    container_name=container_name,\n",
-    "    account_name=account_name,\n",
-    "    create_if_not_exists=True\n",
-    ")"
+    "dataset_name = 'oj_data_small_train'"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{\n",
-       "  \"source\": [\n",
-       "    \"('automl_many_models', 'oj_data_small/')\"\n",
-       "  ],\n",
-       "  \"definition\": [\n",
-       "    \"GetDatastoreFiles\"\n",
-       "  ],\n",
-       "  \"registration\": {\n",
-       "    \"id\": \"f84d587d-a7a8-4a37-a2ac-bd1cb71acb1c\",\n",
-       "    \"name\": \"oj_data_small\",\n",
-       "    \"version\": 1,\n",
-       "    \"workspace\": \"Workspace.create(name='deeptim-eastus2euap-ws', subscription_id='381b38e9-9840-4719-a5a0-61d9585e1e91', resource_group='deeptim_westus_3rg')\"\n",
-       "  }\n",
-       "}"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "ds_train_path = 'oj_data_small/'\n",
-    "ds_inference_path = 'oj_inference_small/'\n",
-    "\n",
-    "from azureml.core.dataset import Dataset\n",
-    "\n",
-    "\n",
-    "# Create file datasets\n",
-    "ds_train = Dataset.File.from_files(path=datastore.path(ds_train_path), validate=False)\n",
-    "ds_inference = Dataset.File.from_files(path=datastore.path(ds_inference_path), validate=False)\n",
-    "\n",
-    "# Register the file datasets\n",
-    "#dataset_name = 'oj_data_small' # if 0 < dataset_maxfiles < 11973 else 'oj_data'\n",
-    "dataset_name = 'oj_data_small'\n",
-    "train_dataset_name = dataset_name #+ '_train'\n",
-    "#inference_dataset_name = dataset_name# + '_inference'\n",
-    "ds_train.register(ws, train_dataset_name, create_new_version=True)\n",
-    "#ds_inference.register(ws, inference_dataset_name, create_new_version=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": null,
    "metadata": {
     "scrolled": true
    },
@@ -267,7 +138,7 @@
    "source": [
     "from azureml.core.dataset import Dataset\n",
     "\n",
-    "dataset = Dataset.get_by_name(ws, name=train_dataset_name)\n",
+    "dataset = Dataset.get_by_name(ws, name=dataset_name)\n",
     "dataset_input = dataset.as_named_input(dataset_name)"
    ]
   },
@@ -284,7 +155,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": null,
    "metadata": {
     "scrolled": true
    },
@@ -314,66 +185,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "cpu_cluster_name = \"many-models\""
+    "cpu_cluster_name = \"cpucluster\""
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": null,
    "metadata": {
     "scrolled": true
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Found existing compute target.\n",
-      "Checking cluster status...\n",
-      "Succeeded\n",
-      "AmlCompute wait for completion finished\n",
-      "\n",
-      "Minimum number of nodes requested have been provisioned\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from azureml.core.compute import AmlCompute\n",
-    "from azureml.core.compute import ComputeTarget\n",
-    "\n",
-    "# Choose a name for your cluster.\n",
-    "# TODO\n",
-    "amlcompute_cluster_name = cpu_cluster_name\n",
-    "\n",
-    "found = False\n",
-    "# Check if this compute target already exists in the workspace.\n",
-    "cts = ws.compute_targets\n",
-    "if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlCompute':\n",
-    "    found = True\n",
-    "    print('Found existing compute target.')\n",
-    "    compute = cts[amlcompute_cluster_name]\n",
-    "    \n",
-    "if not found:\n",
-    "    print('Creating a new compute target...')\n",
-    "    provisioning_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D64S_V3',\n",
-    "                                                           min_nodes=0,\n",
-    "                                                           max_nodes=63,\n",
-    "                                                           admin_username='azureuser', \n",
-    "                                                           admin_user_password='Pa$$word1',\n",
-    "                                                           )\n",
-    "    # Create the cluster.\n",
-    "    compute = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n",
-    "    \n",
-    "print('Checking cluster status...')\n",
-    "# Can poll for a minimum number of nodes and for a specific timeout.\n",
-    "# If no min_node_count is provided, it will use the scale settings for the cluster.\n",
-    "compute.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n",
-    "    \n",
-    "# For a more detailed view of current AmlCompute status, use get_status()."
+    "\n",
+    "compute = AmlCompute(ws, cpu_cluster_name)"
    ]
   },
   {
@@ -399,14 +228,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "from azureml.pipeline.steps import ParallelRunConfig\n",
     "\n",
     "processes_per_node = 8\n",
-    "node_count =1\n",
+    "node_count = 1\n",
     "timeout = 180\n",
     "\n",
     "parallel_run_config = ParallelRunConfig(\n",
@@ -422,15 +251,6 @@
     "    node_count=node_count)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#!pip show pytorch"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -444,7 +264,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": null,
    "metadata": {
     "scrolled": true
    },
@@ -470,7 +290,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": null,
    "metadata": {
     "scrolled": true
    },
@@ -503,19 +323,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Created step many-models-training [c4ffbc96][5bf31ad1-e834-4d9e-86f6-5f4e8af18725], (This step will run and generate new outputs)\n",
-      "Submitted PipelineRun be77c836-82e1-4343-a540-28063e624c35\n",
-      "Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/oj-customscript-test/runs/be77c836-82e1-4343-a540-28063e624c35?wsid=/subscriptions/381b38e9-9840-4719-a5a0-61d9585e1e91/resourcegroups/deeptim_westus_3rg/workspaces/deeptim-eastus2euap-ws\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "from azureml.pipeline.core import Pipeline\n",
     "\n",
@@ -527,16 +337,7 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "PipelineRunId: be77c836-82e1-4343-a540-28063e624c35\n",
-      "Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/oj-customscript-test/runs/be77c836-82e1-4343-a540-28063e624c35?wsid=/subscriptions/381b38e9-9840-4719-a5a0-61d9585e1e91/resourcegroups/deeptim_westus_3rg/workspaces/deeptim-eastus2euap-ws\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "#Wait for the run to complete\n",
     "run.wait_for_completion(show_output=False, raise_on_error=True)"
@@ -774,9 +575,9 @@
    }
   ],
   "kernelspec": {
-   "display_name": "Python [conda env:.conda-pypi] *",
+   "display_name": "Python 3",
    "language": "python",
-   "name": "conda-env-.conda-pypi-py"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -788,7 +589,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.7"
+   "version": "3.7.3"
   }
  },
  "nbformat": 4,

From 5dc8f676f525849aab664dfb6a13f63a69c728cc Mon Sep 17 00:00:00 2001
From: Deepti Mokkapati <deeptim@microsoft.com>
Date: Thu, 11 Feb 2021 12:15:07 -0800
Subject: [PATCH 4/4] child_run

---
 Custom_Script/scripts/train.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Custom_Script/scripts/train.py b/Custom_Script/scripts/train.py
index 9deae49e..cbab8f14 100644
--- a/Custom_Script/scripts/train.py
+++ b/Custom_Script/scripts/train.py
@@ -59,6 +59,7 @@ def run(input_data):
         train = data[:-args.test_size]
         test = data[-args.test_size:]
 
+	child_run = None
         try:
             child_run = current_run.child_run(name=model_name)