diff --git a/docker/Dockerfile b/docker/Dockerfile
index 88b092b9a..f31ff9e1f 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -84,7 +84,7 @@ RUN cd /algorithmic-efficiency && git fetch origin
RUN cd /algorithmic-efficiency && git pull
# Todo: remove this, this is temporary for developing
-COPY scripts/startup.sh /algorithmic-efficiency/docker/scripts/startup.sh
+# COPY scripts/startup.sh /algorithmic-efficiency/docker/scripts/startup.sh
RUN chmod a+x /algorithmic-efficiency/docker/scripts/startup.sh
ENTRYPOINT ["bash", "/algorithmic-efficiency/docker/scripts/startup.sh"]
diff --git a/docs/DOCUMENTATION.md b/docs/DOCUMENTATION.md
index 15ffc7e65..b32b494c4 100644
--- a/docs/DOCUMENTATION.md
+++ b/docs/DOCUMENTATION.md
@@ -797,9 +797,9 @@ a rough guideline, the entire set of workloads was designed to have a combined
runtime of very roughly $100$ hours on the
[**benchmarking hardware**](#benchmarking-hardware).
-The eight *AlgoPerf Workloads* are:
+The nine *AlgoPerf Workloads* are:
- | **Task** | **Dataset** | **Model** | **Loss** | **Metric** | Validation
**Target** | Test
**Target** | Max
**Runtime**
*(in seconds)* | Default
**Dropout**
Value
+| | **Task** | **Dataset** | **Model** | **Loss** | **Metric** | Validation
**Target** | Test
**Target** | Max
**Runtime**
*(in seconds)* | Default
**Dropout**
Value
----- | ----------------------------- | ----------- | ----------- | -------- | ---------- | ------------------------ | ------------------ | ------------------------------------- | -------------------------------
**1** | Clickthrough rate prediction | Criteo 1TB | DLRMsmall | CE | CE (↓) | 0.123735 | 0.126041 | 8,915 | 0
**2** | MRI reconstruction | fastMRI | U-Net | L1 | SSIM (↑) | 0.723653 | 0.740633 | 2,745 | 0
diff --git a/pytorch_scoring_config_1.json b/pytorch_scoring_config_1.json
new file mode 100644
index 000000000..fcc0e86f2
--- /dev/null
+++ b/pytorch_scoring_config_1.json
@@ -0,0 +1,299 @@
+{
+ "0": {
+ "framework": "pytorch",
+ "workload": "imagenet_resnet",
+ "dataset": "imagenet",
+ "submission_path": "submissions_algorithms/submissions/self_tuning/schedule_free_adamw_v2/submission.py",
+ "experiment_dir": "submissions_a100/schedule_free_adamw_v2/study_0",
+ "rng_seed": -1447200680,
+ "tuning_ruleset": "self",
+ "num_tuning_trials": 1,
+ "max_global_steps": 10
+ },
+ "1": {
+ "framework": "pytorch",
+ "workload": "imagenet_resnet",
+ "dataset": "imagenet",
+ "submission_path": "submissions_algorithms/submissions/self_tuning/schedule_free_adamw_v2/submission.py",
+ "experiment_dir": "submissions_a100/schedule_free_adamw_v2/study_1",
+ "rng_seed": -1977906563,
+ "tuning_ruleset": "self",
+ "num_tuning_trials": 1,
+ "max_global_steps": 10
+ },
+ "2": {
+ "framework": "pytorch",
+ "workload": "imagenet_resnet",
+ "dataset": "imagenet",
+ "submission_path": "submissions_algorithms/submissions/self_tuning/schedule_free_adamw_v2/submission.py",
+ "experiment_dir": "submissions_a100/schedule_free_adamw_v2/study_2",
+ "rng_seed": 666869491,
+ "tuning_ruleset": "self",
+ "num_tuning_trials": 1,
+ "max_global_steps": 10
+ },
+ "3": {
+ "framework": "pytorch",
+ "workload": "imagenet_vit",
+ "dataset": "imagenet",
+ "submission_path": "submissions_algorithms/submissions/self_tuning/schedule_free_adamw_v2/submission.py",
+ "experiment_dir": "submissions_a100/schedule_free_adamw_v2/study_0",
+ "rng_seed": -796448826,
+ "tuning_ruleset": "self",
+ "num_tuning_trials": 1,
+ "max_global_steps": 10
+ },
+ "4": {
+ "framework": "pytorch",
+ "workload": "imagenet_vit",
+ "dataset": "imagenet",
+ "submission_path": "submissions_algorithms/submissions/self_tuning/schedule_free_adamw_v2/submission.py",
+ "experiment_dir": "submissions_a100/schedule_free_adamw_v2/study_1",
+ "rng_seed": -557820510,
+ "tuning_ruleset": "self",
+ "num_tuning_trials": 1,
+ "max_global_steps": 10
+ },
+ "5": {
+ "framework": "pytorch",
+ "workload": "imagenet_vit",
+ "dataset": "imagenet",
+ "submission_path": "submissions_algorithms/submissions/self_tuning/schedule_free_adamw_v2/submission.py",
+ "experiment_dir": "submissions_a100/schedule_free_adamw_v2/study_2",
+ "rng_seed": -1307522002,
+ "tuning_ruleset": "self",
+ "num_tuning_trials": 1,
+ "max_global_steps": 10
+ },
+ "6": {
+ "framework": "pytorch",
+ "workload": "fastmri",
+ "dataset": "fastmri",
+ "submission_path": "submissions_algorithms/submissions/self_tuning/schedule_free_adamw_v2/submission.py",
+ "experiment_dir": "submissions_a100/schedule_free_adamw_v2/study_0",
+ "rng_seed": 1083014187,
+ "tuning_ruleset": "self",
+ "num_tuning_trials": 1,
+ "max_global_steps": 10
+ },
+ "7": {
+ "framework": "pytorch",
+ "workload": "fastmri",
+ "dataset": "fastmri",
+ "submission_path": "submissions_algorithms/submissions/self_tuning/schedule_free_adamw_v2/submission.py",
+ "experiment_dir": "submissions_a100/schedule_free_adamw_v2/study_1",
+ "rng_seed": -1077277636,
+ "tuning_ruleset": "self",
+ "num_tuning_trials": 1,
+ "max_global_steps": 10
+ },
+ "8": {
+ "framework": "pytorch",
+ "workload": "fastmri",
+ "dataset": "fastmri",
+ "submission_path": "submissions_algorithms/submissions/self_tuning/schedule_free_adamw_v2/submission.py",
+ "experiment_dir": "submissions_a100/schedule_free_adamw_v2/study_2",
+ "rng_seed": -397959160,
+ "tuning_ruleset": "self",
+ "num_tuning_trials": 1,
+ "max_global_steps": 10
+ },
+ "9": {
+ "framework": "pytorch",
+ "workload": "ogbg",
+ "dataset": "ogbg",
+ "submission_path": "submissions_algorithms/submissions/self_tuning/schedule_free_adamw_v2/submission.py",
+ "experiment_dir": "submissions_a100/schedule_free_adamw_v2/study_0",
+ "rng_seed": 1662399765,
+ "tuning_ruleset": "self",
+ "num_tuning_trials": 1,
+ "max_global_steps": 10
+ },
+ "10": {
+ "framework": "pytorch",
+ "workload": "ogbg",
+ "dataset": "ogbg",
+ "submission_path": "submissions_algorithms/submissions/self_tuning/schedule_free_adamw_v2/submission.py",
+ "experiment_dir": "submissions_a100/schedule_free_adamw_v2/study_1",
+ "rng_seed": 486196682,
+ "tuning_ruleset": "self",
+ "num_tuning_trials": 1,
+ "max_global_steps": 10
+ },
+ "11": {
+ "framework": "pytorch",
+ "workload": "ogbg",
+ "dataset": "ogbg",
+ "submission_path": "submissions_algorithms/submissions/self_tuning/schedule_free_adamw_v2/submission.py",
+ "experiment_dir": "submissions_a100/schedule_free_adamw_v2/study_2",
+ "rng_seed": 1039483369,
+ "tuning_ruleset": "self",
+ "num_tuning_trials": 1,
+ "max_global_steps": 10
+ },
+ "12": {
+ "framework": "pytorch",
+ "workload": "wmt",
+ "dataset": "wmt",
+ "submission_path": "submissions_algorithms/submissions/self_tuning/schedule_free_adamw_v2/submission.py",
+ "experiment_dir": "submissions_a100/schedule_free_adamw_v2/study_0",
+ "rng_seed": -811149048,
+ "tuning_ruleset": "self",
+ "num_tuning_trials": 1,
+ "max_global_steps": 10
+ },
+ "13": {
+ "framework": "pytorch",
+ "workload": "wmt",
+ "dataset": "wmt",
+ "submission_path": "submissions_algorithms/submissions/self_tuning/schedule_free_adamw_v2/submission.py",
+ "experiment_dir": "submissions_a100/schedule_free_adamw_v2/study_1",
+ "rng_seed": -1485236731,
+ "tuning_ruleset": "self",
+ "num_tuning_trials": 1,
+ "max_global_steps": 10
+ },
+ "14": {
+ "framework": "pytorch",
+ "workload": "wmt",
+ "dataset": "wmt",
+ "submission_path": "submissions_algorithms/submissions/self_tuning/schedule_free_adamw_v2/submission.py",
+ "experiment_dir": "submissions_a100/schedule_free_adamw_v2/study_2",
+ "rng_seed": -439753961,
+ "tuning_ruleset": "self",
+ "num_tuning_trials": 1,
+ "max_global_steps": 10
+ },
+ "15": {
+ "framework": "pytorch",
+ "workload": "librispeech_deepspeech",
+ "dataset": "librispeech",
+ "submission_path": "submissions_algorithms/submissions/self_tuning/schedule_free_adamw_v2/submission.py",
+ "experiment_dir": "submissions_a100/schedule_free_adamw_v2/study_0",
+ "rng_seed": -1459326687,
+ "tuning_ruleset": "self",
+ "num_tuning_trials": 1,
+ "max_global_steps": 10
+ },
+ "16": {
+ "framework": "pytorch",
+ "workload": "librispeech_deepspeech",
+ "dataset": "librispeech",
+ "submission_path": "submissions_algorithms/submissions/self_tuning/schedule_free_adamw_v2/submission.py",
+ "experiment_dir": "submissions_a100/schedule_free_adamw_v2/study_1",
+ "rng_seed": 1889675898,
+ "tuning_ruleset": "self",
+ "num_tuning_trials": 1,
+ "max_global_steps": 10
+ },
+ "17": {
+ "framework": "pytorch",
+ "workload": "librispeech_deepspeech",
+ "dataset": "librispeech",
+ "submission_path": "submissions_algorithms/submissions/self_tuning/schedule_free_adamw_v2/submission.py",
+ "experiment_dir": "submissions_a100/schedule_free_adamw_v2/study_2",
+ "rng_seed": -1297403039,
+ "tuning_ruleset": "self",
+ "num_tuning_trials": 1,
+ "max_global_steps": 10
+ },
+ "18": {
+ "framework": "pytorch",
+ "workload": "criteo1tb",
+ "dataset": "criteo1tb",
+ "submission_path": "submissions_algorithms/submissions/self_tuning/schedule_free_adamw_v2/submission.py",
+ "experiment_dir": "submissions_a100/schedule_free_adamw_v2/study_0",
+ "rng_seed": -1790695410,
+ "tuning_ruleset": "self",
+ "num_tuning_trials": 1,
+ "max_global_steps": 10
+ },
+ "19": {
+ "framework": "pytorch",
+ "workload": "criteo1tb",
+ "dataset": "criteo1tb",
+ "submission_path": "submissions_algorithms/submissions/self_tuning/schedule_free_adamw_v2/submission.py",
+ "experiment_dir": "submissions_a100/schedule_free_adamw_v2/study_1",
+ "rng_seed": -816806699,
+ "tuning_ruleset": "self",
+ "num_tuning_trials": 1,
+ "max_global_steps": 10
+ },
+ "20": {
+ "framework": "pytorch",
+ "workload": "criteo1tb",
+ "dataset": "criteo1tb",
+ "submission_path": "submissions_algorithms/submissions/self_tuning/schedule_free_adamw_v2/submission.py",
+ "experiment_dir": "submissions_a100/schedule_free_adamw_v2/study_2",
+ "rng_seed": 1704852417,
+ "tuning_ruleset": "self",
+ "num_tuning_trials": 1,
+ "max_global_steps": 10
+ },
+ "21": {
+ "framework": "pytorch",
+ "workload": "librispeech_conformer",
+ "dataset": "librispeech",
+ "submission_path": "submissions_algorithms/submissions/self_tuning/schedule_free_adamw_v2/submission.py",
+ "experiment_dir": "submissions_a100/schedule_free_adamw_v2/study_0",
+ "rng_seed": 1605670948,
+ "tuning_ruleset": "self",
+ "num_tuning_trials": 1,
+ "max_global_steps": 10
+ },
+ "22": {
+ "framework": "pytorch",
+ "workload": "librispeech_conformer",
+ "dataset": "librispeech",
+ "submission_path": "submissions_algorithms/submissions/self_tuning/schedule_free_adamw_v2/submission.py",
+ "experiment_dir": "submissions_a100/schedule_free_adamw_v2/study_1",
+ "rng_seed": -1323816683,
+ "tuning_ruleset": "self",
+ "num_tuning_trials": 1,
+ "max_global_steps": 10
+ },
+ "23": {
+ "framework": "pytorch",
+ "workload": "librispeech_conformer",
+ "dataset": "librispeech",
+ "submission_path": "submissions_algorithms/submissions/self_tuning/schedule_free_adamw_v2/submission.py",
+ "experiment_dir": "submissions_a100/schedule_free_adamw_v2/study_2",
+ "rng_seed": -1881486829,
+ "tuning_ruleset": "self",
+ "num_tuning_trials": 1,
+ "max_global_steps": 10
+ },
+ "24": {
+ "framework": "pytorch",
+ "workload": "finewebedu_lm",
+ "dataset": "fineweb_edu_10B",
+ "submission_path": "submissions_algorithms/submissions/self_tuning/schedule_free_adamw_v2/submission.py",
+ "experiment_dir": "submissions_a100/schedule_free_adamw_v2/study_0",
+ "rng_seed": -304430747,
+ "tuning_ruleset": "self",
+ "num_tuning_trials": 1,
+ "max_global_steps": 10
+ },
+ "25": {
+ "framework": "pytorch",
+ "workload": "finewebedu_lm",
+ "dataset": "fineweb_edu_10B",
+ "submission_path": "submissions_algorithms/submissions/self_tuning/schedule_free_adamw_v2/submission.py",
+ "experiment_dir": "submissions_a100/schedule_free_adamw_v2/study_1",
+ "rng_seed": -912336586,
+ "tuning_ruleset": "self",
+ "num_tuning_trials": 1,
+ "max_global_steps": 10
+ },
+ "26": {
+ "framework": "pytorch",
+ "workload": "finewebedu_lm",
+ "dataset": "fineweb_edu_10B",
+ "submission_path": "submissions_algorithms/submissions/self_tuning/schedule_free_adamw_v2/submission.py",
+ "experiment_dir": "submissions_a100/schedule_free_adamw_v2/study_2",
+ "rng_seed": 1970089239,
+ "tuning_ruleset": "self",
+ "num_tuning_trials": 1,
+ "max_global_steps": 10
+ }
+}
\ No newline at end of file
diff --git a/run_pytorch_scoring.sh b/run_pytorch_scoring.sh
new file mode 100644
index 000000000..4f0794416
--- /dev/null
+++ b/run_pytorch_scoring.sh
@@ -0,0 +1,87 @@
+#!/bin/bash
+
+#SBATCH --nodes=1 # give it a full node
+#SBATCH --ntasks-per-node=1
+#SBATCH --array=0-26
+#SBATCH --partition=a100
+#SBATCH --gpus-per-node=4
+#SBATCH --exclusive #this will not allow other jobs to run on this cluster
+#SBATCH --output=experiments/tests/updated_schedule_free/job_%A_%a.out
+#SBATCH --error=experiments/tests/updated_schedule_free/job_%A_%a.err
+
+# Usage: sbatch .sh
+# This script reads config.json and launches a sbatch job using task
+# arrays where each job in the array corresponds to a training run
+# for a workload given a random seed and tuning trial index.
+# To generate the config.json use make_job_config.py.
+
+set -x
+
+# Pull docker image (ATTENTION: you may want to modify this)
+REPO="europe-west4-docker.pkg.dev"
+IMAGE="europe-west4-docker.pkg.dev/mlcommons-algoperf/algoperf-docker-repo/algoperf_pytorch_main:latest"
+yes | gcloud auth configure-docker $REPO
+docker pull $IMAGE
+# Job config (ATTENTION: you may want to modify this)
+config_file="$HOME/algorithmic-efficiency/pytorch_scoring_config_1.json" # Replace with your config file path
+LOGS_BUCKET="algoperf-runs" # replace with your bucket used for logging
+
+
+# Function to read a JSON file and extract a value by key
+read_json_value() {
+ local json_file="$1"
+ local index="$2"
+ local key="$3"
+ local value=$(jq -r ".[\"$index\"].$key" "$json_file")
+ echo "$value"
+}
+
+# Check if jq is installed
+if ! command -v jq &> /dev/null
+then
+ echo "jq could not be found. Please install it."
+ exit 1
+fi
+
+TASK="$SLURM_ARRAY_TASK_ID"
+FRAMEWORK=$(read_json_value "$config_file" "$TASK" "framework")
+DATASET=$(read_json_value "$config_file" "$TASK" "dataset")
+SUBMISSION_PATH=$(read_json_value "$config_file" "$TASK" "submission_path")
+FRAMEWORK=$(read_json_value "$config_file" "$TASK" "framework")
+TUNING_SEARCH_SPACE=$(read_json_value "$config_file" "$TASK" "tuning_search_space")
+EXPERIMENT_DIR=$(read_json_value "$config_file" "$TASK" "experiment_dir")
+MAX_STEPS=$(read_json_value "$config_file" "$TASK" "max_steps")
+RNG_SEED=$(read_json_value "$config_file" "$TASK" "rng_seed")
+WORKLOAD=$(read_json_value "$config_file" "$TASK" "workload")
+HPARAM_START_INDEX=$(read_json_value "$config_file" "$TASK" "hparam_start_index")
+HPARAM_END_INDEX=$(read_json_value "$config_file" "$TASK" "hparam_end_index")
+NUM_TUNING_TRIALS=$(read_json_value "$config_file" "$TASK" "num_tuning_trials")
+TUNING_RULESET=$(read_json_value "$config_file" "$TASK" "tuning_ruleset")
+MAX_GLOBAL_STEPS=$(read_json_value "$config_file" "$TASK" "max_global_steps")
+
+docker run \
+ -v /opt/data/:/data/ \
+ -v $HOME/experiment_runs:/experiment_runs \
+ -v $HOME/submissions_algorithms/:/algorithmic-efficiency/submissions_algorithms \
+ -v $HOME/algorithmic-efficiency/docker/scripts/startup.sh:/algorithmic-efficiency/docker/scripts/startup.sh \
+ --gpus all \
+ --ipc=host \
+ $IMAGE \
+ -d $DATASET \
+ -f $FRAMEWORK \
+ -s $SUBMISSION_PATH \
+ -w $WORKLOAD \
+ -t $TUNING_SEARCH_SPACE \
+ -e $EXPERIMENT_DIR \
+ -c False \
+ -o True \
+ --rng_seed $RNG_SEED \
+ --hparam_start_index $HPARAM_START_INDEX \
+ --hparam_end_index $HPARAM_END_INDEX \
+ --num_tuning_trials $NUM_TUNING_TRIALS \
+ --tuning_ruleset $TUNING_RULESET \
+ -i true \
+ -r false \
+ --logs_bucket $LOGS_BUCKET \
+ -m $MAX_GLOBAL_STEPS
+
diff --git a/scoring/performance_profile.py b/scoring/performance_profile.py
index 043a65791..eecb9799e 100644
--- a/scoring/performance_profile.py
+++ b/scoring/performance_profile.py
@@ -59,7 +59,7 @@
# workloads and rules for the scoring to be correct.
# We do not use the workload registry since it contains test and development
# workloads as well.
-NUM_BASE_WORKLOADS = 8
+NUM_BASE_WORKLOADS = 9
NUM_VARIANT_WORKLOADS = 0
NUM_TRIALS = 5
NUM_STUDIES = 3
diff --git a/scoring/score_submissions.py b/scoring/score_submissions.py
index efe276a33..10e624148 100644
--- a/scoring/score_submissions.py
+++ b/scoring/score_submissions.py
@@ -75,10 +75,10 @@
FLAGS = flags.FLAGS
-def get_summary_df(workload, workload_df, include_test_split=False):
+def get_summary_df(workload, workload_df):
print(f' WORKLOAD: {workload}')
validation_metric, validation_target = (
- scoring_utils.get_workload_metrics_and_targets(workload, split='validation')
+ scoring_utils.get_workload_metrics_and_targets(workload)
)
is_minimized = performance_profile.check_if_minimized(validation_metric)
@@ -127,7 +127,7 @@ def get_summary_df(workload, workload_df, include_test_split=False):
# compute the step times
def delta(series):
- return series.shift(1, fill_value=0) - series
+ return series.apply(lambda x: np.diff(x, prepend=0))
accumulated_time_intervals = delta(workload_df['accumulated_submission_time'])
step_intervals = delta(workload_df['global_step'])
@@ -136,47 +136,19 @@ def delta(series):
f'WARNING: The number of evals may be too low to calculate reliable step time for {workload}'
)
- summary_df['step_time (s)'] = np.median(
- (accumulated_time_intervals / step_intervals).iloc[0]
- )
-
- summary_df['step_hint'] = scoring_utils.get_workload_stephint(workload)
-
- # test metrics
- if include_test_split:
- test_metric, test_target = scoring_utils.get_workload_metrics_and_targets(
- workload, split='test'
+ # Flatten all intervals from all trials and take the global median
+ with np.errstate(divide='ignore', invalid='ignore'):
+ all_ratios = np.concatenate(
+ (accumulated_time_intervals / step_intervals).values
)
+ summary_df['step_time (s)'] = np.nanmedian(all_ratios)
- summary_df['test target metric name'] = test_metric
- summary_df['test target metric value'] = test_target
-
- summary_df['test target reached'] = (
- workload_df[test_metric]
- .apply(lambda x: target_op(x, test_target))
- .apply(np.any)
- )
- summary_df['best metric value on test'] = workload_df[test_metric].apply(
- lambda x: best_op(x)
- )
- workload_df['index best eval on test'] = workload_df[test_metric].apply(
- lambda x: idx_op(x)
- )
- summary_df['time to best eval on test (s)'] = workload_df.apply(
- lambda x: x['accumulated_submission_time'][x['index best eval on test']],
- axis=1,
- )
- summary_df['time to target on test (s)'] = summary_df.apply(
- lambda x: x['time to best eval on test (s)']
- if x['test target reached']
- else np.inf,
- axis=1,
- )
+ summary_df['step_hint'] = scoring_utils.get_workload_stephint(workload)
return summary_df
-def get_submission_summary(df, include_test_split=False):
+def get_submission_summary(df):
"""Summarizes the submission results into metric and time tables
organized by workload.
"""
@@ -184,9 +156,7 @@ def get_submission_summary(df, include_test_split=False):
dfs = []
print(df)
for workload, group in df.groupby('workload'):
- summary_df = get_summary_df(
- workload, group, include_test_split=include_test_split
- )
+ summary_df = get_summary_df(workload, group)
dfs.append(summary_df)
df = pd.concat(dfs)
diff --git a/scoring/scoring_utils.py b/scoring/scoring_utils.py
index cb63eab4b..c2ab8aeec 100644
--- a/scoring/scoring_utils.py
+++ b/scoring/scoring_utils.py
@@ -214,7 +214,7 @@ def get_experiment_df(experiment_dir):
## Get workload properties
-def get_workload_metrics_and_targets(workload, split='validation'):
+def get_workload_metrics_and_targets(workload):
"""Returns workload target metric name and value."""
workload_name = re.match(WORKLOAD_NAME_PATTERN, workload).group(1)
framework = re.match(WORKLOAD_NAME_PATTERN, workload).group(2)
@@ -233,12 +233,8 @@ def get_workload_metrics_and_targets(workload, split='validation'):
workload_init_kwargs=workload_init_kwargs,
)
metric_name = workload_obj.target_metric_name
- if split == 'validation':
- metric = f'validation/{metric_name}'
- target = workload_obj.validation_target_value
- elif split == 'test':
- metric = f'test/{metric_name}'
- target = workload_obj.test_target_value
+ metric = f'validation/{metric_name}'
+ target = workload_obj.validation_target_value
return metric, target
diff --git a/scoring/utils/slurm/make_job_config.py b/scoring/utils/slurm/make_job_config.py
index 39f6f5eb0..91afc9196 100644
--- a/scoring/utils/slurm/make_job_config.py
+++ b/scoring/utils/slurm/make_job_config.py
@@ -9,6 +9,7 @@
import json
import os
+import struct
import jax
from absl import app, flags
@@ -17,8 +18,6 @@
TUNING_SEARCH_SPACE = (
'reference_algorithms/paper_baselines/adamw/tuning_search_space.json'
)
-NUM_TUNING_TRIALS = 3 # For external tuning ruleset
-NUM_STUDIES = 3
flags.DEFINE_string(
'submission_path',
@@ -35,11 +34,6 @@
'experiments',
'Path to experiment dir where logs will be saved.',
)
-flags.DEFINE_string(
- 'experiment_dir',
- 'experiments/',
- 'Path to experiment dir where logs will be saved.',
-)
flags.DEFINE_enum(
'framework',
'jax',
@@ -56,14 +50,13 @@
flags.DEFINE_string(
'workloads', None, help='Comma seperated list of workloads to run.'
)
-flags.DEFINE_integer('num_studies', NUM_STUDIES, help='Number of studies.')
+flags.DEFINE_integer('num_studies', None, help='Number of studies.')
+flags.DEFINE_integer('num_tuning_trials', None, help='Number of tuning trials.')
FLAGS = flags.FLAGS
MIN_INT = -(2 ** (31))
MAX_INT = 2 ** (31) - 1
-NUM_TUNING_TRIALS = 5 # For external tuning ruleset
-NUM_STUDIES = 3
WORKLOADS = {
'imagenet_resnet': {'dataset': 'imagenet'},
@@ -74,6 +67,12 @@
'librispeech_deepspeech': {'dataset': 'librispeech'},
'criteo1tb': {'dataset': 'criteo1tb'},
'librispeech_conformer': {'dataset': 'librispeech'},
+ 'finewebedu_lm': {'dataset': 'fineweb_edu_10B'}
+}
+
+RULESET_CONFIGS = {
+ 'self': {'num_studies': 3, 'num_tuning_trials': 1},
+ 'external': {'num_studies': 3, 'num_tuning_trials': 5},
}
@@ -83,17 +82,29 @@ def main(_):
else:
workloads = FLAGS.workloads.split(',')
- key = jax.random.key(FLAGS.seed)
+ if not FLAGS.seed:
+ FLAGS.seed = struct.unpack('I', os.urandom(4))[0]
+
+ # Set defaults based on tuning_ruleset if not provided by user
+ num_studies = FLAGS.num_studies
+ if num_studies is None:
+ num_studies = RULESET_CONFIGS[FLAGS.tuning_ruleset]['num_studies']
+
+ num_tuning_trials = FLAGS.num_tuning_trials
+ if num_tuning_trials is None:
+ num_tuning_trials = RULESET_CONFIGS[FLAGS.tuning_ruleset]['num_tuning_trials']
+
+ key = jax.random.PRNGKey(FLAGS.seed)
jobs = []
for workload in workloads:
# Fold in hash(workload) mod(max(uint32))
workload_key = jax.random.fold_in(key, hash(workload) % (2**32 - 1))
- for study_index in range(NUM_STUDIES):
+ for study_index in range(num_studies):
study_key = jax.random.fold_in(workload_key, study_index)
if FLAGS.tuning_ruleset == 'external':
- for hparam_index in range(NUM_TUNING_TRIALS):
+ for hparam_index in range(num_tuning_trials):
run_key = jax.random.fold_in(study_key, hparam_index)
seed = jax.random.randint(run_key, (1,), MIN_INT, MAX_INT)[0].item()
print(seed)
@@ -107,7 +118,7 @@ def main(_):
job['experiment_dir'] = study_dir
job['rng_seed'] = seed
job['tuning_ruleset'] = FLAGS.tuning_ruleset
- job['num_tuning_trials'] = NUM_TUNING_TRIALS
+ job['num_tuning_trials'] = num_tuning_trials
job['hparam_start_index'] = hparam_index
job['hparam_end_index'] = hparam_index + 1
job['tuning_search_space'] = FLAGS.tuning_search_space
diff --git a/scoring/utils/slurm/run_jobs.sh b/scoring/utils/slurm/run_jobs.sh
index 5fcf8f69e..5232387be 100644
--- a/scoring/utils/slurm/run_jobs.sh
+++ b/scoring/utils/slurm/run_jobs.sh
@@ -2,31 +2,21 @@
#SBATCH --nodes=1 # give it a full node
#SBATCH --ntasks-per-node=1
-#SBATCH --array=
-#SBATCH --partition=v100
-#SBATCH --gpus-per-node=8
+#SBATCH --array=0-26
+#SBATCH --partition=a100
+#SBATCH --gpus-per-node=4
#SBATCH --exclusive #this will not allow other jobs to run on this cluster
-#SBATCH --output=experiments/tests/jit_debug_deepspeech_old_stephint_nadamw/job_%A_%a.out
-#SBATCH --error=experiments/tests/jit_debug_deepspeech_old_stephint_nadamw/job_%A_%a.err
+#SBATCH --output=experiments/tests/updated_schedule_free/job_%A_%a.out
+#SBATCH --error=experiments/tests/updated_schedule_free/job_%A_%a.err
-# Usage: sbatch .sh
+# Usage: sbatch .sh [options]
# This script reads config.json and launches a sbatch job using task
-# arrays where each job in the array corresponds to a training run
+# arrays where each job in the array corresponds to a training run
# for a workload given a random seed and tuning trial index.
# To generate the config.json use make_job_config.py.
set -x
-# Pull docker image (ATTENTION: you may want to modify this)
-REPO=""
-IMAGE=""
-y | gcloud auth configure-docker $REPO
-docker pull $IMAGE
-# Job config (ATTENTION: you may want to modify this)
-config_file="" # Replace with your config file path
-LOGS_BUCKET="" # replace with your bucket used for logging
-
-
# Function to read a JSON file and extract a value by key
read_json_value() {
local json_file="$1"
@@ -43,41 +33,137 @@ then
exit 1
fi
-TASK="$SLURM_ARRAY_TASK_ID"
-FRAMEWORK=$(read_json_value "$config_file" "$TASK" "framework")
-DATASET=$(read_json_value "$config_file" "$TASK" "dataset")
-SUBMISSION_PATH=$(read_json_value "$config_file" "$TASK" "submission_path")
-FRAMEWORK=$(read_json_value "$config_file" "$TASK" "framework")
-TUNING_SEARCH_SPACE=$(read_json_value "$config_file" "$TASK" "tuning_search_space")
-EXPERIMENT_DIR=$(read_json_value "$config_file" "$TASK" "experiment_dir")
-MAX_STEPS=$(read_json_value "$config_file" "$TASK" "max_steps")
-RNG_SEED=$(read_json_value "$config_file" "$TASK" "rng_seed")
-WORKLOAD=$(read_json_value "$config_file" "$TASK" "workload")
-HPARAM_START_INDEX=$(read_json_value "$config_file" "$TASK" "hparam_start_index")
-HPARAM_END_INDEX=$(read_json_value "$config_file" "$TASK" "hparam_end_index")
-NUM_TUNING_TRIALS=$(read_json_value "$config_file" "$TASK" "num_tuning_trials")
-TUNING_RULESET=$(read_json_value "$config_file" "$TASK" "tuning_ruleset")
-MAX_GLOBAL_STEPS=$(read_json_value "$config_file" "$MAX_GLOBAL_STEPS" "max_global_steps")
+# Default values
+REPO="europe-west4-docker.pkg.dev"
+IMAGE="europe-west4-docker.pkg.dev/mlcommons-algoperf/algoperf-docker-repo/algoperf_pytorch_main:latest"
+CONFIG_FILE="$HOME/algorithmic-efficiency/pytorch_scoring_config_1.json"
+LOGS_BUCKET="algoperf-runs"
+TASK_ID="${SLURM_ARRAY_TASK_ID:-0}"
+
+# Parse flags
+while [[ $# -gt 0 ]]; do
+ case $1 in
+ --repo)
+ REPO="$2"
+ shift 2
+ ;;
+ --image)
+ IMAGE="$2"
+ shift 2
+ ;;
+ --config_file)
+ CONFIG_FILE="$2"
+ shift 2
+ ;;
+ --logs_bucket)
+ LOGS_BUCKET="$2"
+ shift 2
+ ;;
+ --task_id)
+ TASK_ID="$2"
+ shift 2
+ ;;
+ --framework)
+ FRAMEWORK="$2"
+ shift 2
+ ;;
+ --dataset)
+ DATASET="$2"
+ shift 2
+ ;;
+ --submission_path)
+ SUBMISSION_PATH="$2"
+ shift 2
+ ;;
+ --tuning_search_space)
+ TUNING_SEARCH_SPACE="$2"
+ shift 2
+ ;;
+ --experiment_dir)
+ EXPERIMENT_DIR="$2"
+ shift 2
+ ;;
+ --rng_seed)
+ RNG_SEED="$2"
+ shift 2
+ ;;
+ --workload)
+ WORKLOAD="$2"
+ shift 2
+ ;;
+ --hparam_start_index)
+ HPARAM_START_INDEX="$2"
+ shift 2
+ ;;
+ --hparam_end_index)
+ HPARAM_END_INDEX="$2"
+ shift 2
+ ;;
+ --num_tuning_trials)
+ NUM_TUNING_TRIALS="$2"
+ shift 2
+ ;;
+ --tuning_ruleset)
+ TUNING_RULESET="$2"
+ shift 2
+ ;;
+ --max_global_steps)
+ MAX_GLOBAL_STEPS="$2"
+ shift 2
+ ;;
+ *)
+ echo "Unknown option $1"
+ exit 1
+ ;;
+ esac
+done
+
+# Pull docker image
+yes | gcloud auth configure-docker "$REPO"
+docker pull "$IMAGE"
+
+# Set variables from config file if not already set by flags
+FRAMEWORK="${FRAMEWORK:-$(read_json_value "$CONFIG_FILE" "$TASK_ID" "framework")}"
+DATASET="${DATASET:-$(read_json_value "$CONFIG_FILE" "$TASK_ID" "dataset")}"
+SUBMISSION_PATH="${SUBMISSION_PATH:-$(read_json_value "$CONFIG_FILE" "$TASK_ID" "submission_path")}"
+TUNING_SEARCH_SPACE="${TUNING_SEARCH_SPACE:-$(read_json_value "$CONFIG_FILE" "$TASK_ID" "tuning_search_space")}"
+EXPERIMENT_DIR="${EXPERIMENT_DIR:-$(read_json_value "$CONFIG_FILE" "$TASK_ID" "experiment_dir")}"
+RNG_SEED="${RNG_SEED:-$(read_json_value "$CONFIG_FILE" "$TASK_ID" "rng_seed")}"
+WORKLOAD="${WORKLOAD:-$(read_json_value "$CONFIG_FILE" "$TASK_ID" "workload")}"
+HPARAM_START_INDEX="${HPARAM_START_INDEX:-$(read_json_value "$CONFIG_FILE" "$TASK_ID" "hparam_start_index")}"
+HPARAM_END_INDEX="${HPARAM_END_INDEX:-$(read_json_value "$CONFIG_FILE" "$TASK_ID" "hparam_end_index")}"
+NUM_TUNING_TRIALS="${NUM_TUNING_TRIALS:-$(read_json_value "$CONFIG_FILE" "$TASK_ID" "num_tuning_trials")}"
+TUNING_RULESET="${TUNING_RULESET:-$(read_json_value "$CONFIG_FILE" "$TASK_ID" "tuning_ruleset")}"
+
+DOCKER_CMD=(
+ docker run
+ -v /opt/data/:/data/
+ -v "$HOME/experiment_runs:/experiment_runs"
+ -v "$HOME/submissions_algorithms/:/algorithmic-efficiency/submissions_algorithms"
+ -v "$HOME/algorithmic-efficiency/docker/scripts/startup.sh:/algorithmic-efficiency/docker/scripts/startup.sh"
+ --gpus all
+ --ipc=host
+ "$IMAGE"
+ -d "$DATASET"
+ -f "$FRAMEWORK"
+ -s "$SUBMISSION_PATH"
+ -w "$WORKLOAD"
+ -t "$TUNING_SEARCH_SPACE"
+ -e "$EXPERIMENT_DIR"
+ -c False
+ -o True
+ --rng_seed "$RNG_SEED"
+ --hparam_start_index "$HPARAM_START_INDEX"
+ --hparam_end_index "$HPARAM_END_INDEX"
+ --num_tuning_trials "$NUM_TUNING_TRIALS"
+ --tuning_ruleset "$TUNING_RULESET"
+ -i true
+ -r false
+ --logs_bucket "$LOGS_BUCKET"
+)
+
+if [ -n "$MAX_GLOBAL_STEPS" ]; then
+ DOCKER_CMD+=(-m "$MAX_GLOBAL_STEPS")
+fi
-docker run \
- -v /opt/data/:/data/ \
- -v $HOME/submissions_algorithms/:/algorithmic-efficiency/submissions_algorithms \
- --gpus all \
- --ipc=host \
- $IMAGE \
- -d $DATASET \
- -f $FRAMEWORK \
- -s $SUBMISSION_PATH \
- -w $WORKLOAD \
- -t $TUNING_SEARCH_SPACE \
- -e $EXPERIMENT_DIR \
- -c False \
- -o True \
- --rng_seed $RNG_SEED \
- --hparam_start_index $HPARAM_START_INDEX \
- --hparam_end_index $HPARAM_END_INDEX \
- --num_tuning_trials $NUM_TUNING_TRIALS \
- --tuning_ruleset $TUNING_RULESET \
- --logs_bucket $LOGS_BUCKET \
- -i true \
- -r false
\ No newline at end of file
+"${DOCKER_CMD[@]}"
diff --git a/scoring/utils/slurm/run_submission.sh b/scoring/utils/slurm/run_submission.sh
new file mode 100644
index 000000000..ddef9f586
--- /dev/null
+++ b/scoring/utils/slurm/run_submission.sh
@@ -0,0 +1,204 @@
+#!/bin/bash
+
+# Usage:
+# ./algorithmic-efficiency/scoring/utils/slurm/run_submission.sh \
+# --submission_path submissions_algorithms/submissions/self_tuning/schedule_free_adamw_v2
+#
+# Note: --dry_run is true by default (sets MAX_GLOBAL_STEPS=10).
+# To perform a full run, explicitly set --dry_run false.
+
+set -e
+set -x
+
+# --- Global Variables ---
+SUBMISSION_PATH=""
+DRY_RUN=true
+MAX_GLOBAL_STEPS=10
+SUBMISSION_NAME=""
+RULESET=""
+FRAMEWORK=""
+ARRAY_RANGE=""
+
+# --- Helper Functions ---
+
+install_yq() {
+ if ! command -v yq &> /dev/null; then
+ echo "yq not found. Attempting to install locally to $HOME/.local/bin..."
+ mkdir -p "$HOME/.local/bin"
+ local OS=$(uname | tr '[:upper:]' '[:lower:]')
+ local ARCH=$(uname -m)
+ case "$ARCH" in
+ x86_64) ARCH="amd64" ;;
+ aarch64) ARCH="arm64" ;;
+ esac
+
+ local YQ_URL="https://github.com/mikefarah/yq/releases/latest/download/yq_${OS}_${ARCH}"
+ if command -v curl &> /dev/null; then
+ curl -L "$YQ_URL" -o "$HOME/.local/bin/yq"
+ elif command -v wget &> /dev/null; then
+ wget "$YQ_URL" -O "$HOME/.local/bin/yq"
+ else
+ echo "Error: Neither curl nor wget found. Please install yq manually: https://github.com/mikefarah/yq"
+ exit 1
+ fi
+ chmod +x "$HOME/.local/bin/yq"
+ export PATH="$HOME/.local/bin:$PATH"
+ echo "yq installed successfully to $HOME/.local/bin"
+ fi
+}
+
+check_command() {
+ if ! command -v "$1" &> /dev/null; then
+ echo "Error: $1 could not be found. Please install it."
+ exit 1
+ fi
+}
+
+verify_environment() {
+ if [[ "$PWD" != "$HOME" ]]; then
+ echo "Error: This script must be run from your home directory ($HOME)."
+ echo "Expected directory structure:"
+ echo " $HOME/"
+ echo " ├── algorithmic-efficiency/"
+ echo " └── submissions_algorithms/"
+ exit 1
+ fi
+
+ if [[ ! -d "algorithmic-efficiency" || ! -d "submissions_algorithms" ]]; then
+ echo "Error: Required repositories not found in the current directory."
+ echo "Please ensure both 'algorithmic-efficiency' and 'submissions_algorithms' are present in $HOME."
+ exit 1
+ fi
+
+ install_yq
+ check_command "jq"
+}
+
+parse_flags() {
+ while [[ $# -gt 0 ]]; do
+ case $1 in
+ --submission_path)
+ SUBMISSION_PATH="$2"
+ shift 2
+ ;;
+ --dry_run)
+ DRY_RUN="$2"
+ shift 2
+ ;;
+ *)
+ echo "Unknown option $1"
+ exit 1
+ ;;
+ esac
+ done
+
+ if [ -z "$SUBMISSION_PATH" ]; then
+ echo "Error: --submission_path is required."
+ exit 1
+ fi
+
+ if [ "$DRY_RUN" = false ]; then
+ MAX_GLOBAL_STEPS=""
+ fi
+}
+
+extract_submission_info() {
+ SUBMISSION_NAME=$(basename "$SUBMISSION_PATH")
+ local info_file="$SUBMISSION_PATH/submission_info.yml"
+
+ if [ ! -f "$info_file" ]; then
+ echo "Error: $info_file not found."
+ exit 1
+ fi
+
+ local raw_ruleset=$(yq eval '.ruleset' "$info_file" | tr '[:upper:]' '[:lower:]')
+ FRAMEWORK=$(yq eval '.framework' "$info_file" | tr '[:upper:]' '[:lower:]')
+
+ # Parse ruleset by checking for substrings "self" or "external"
+ if [[ "$raw_ruleset" == *"self"* ]]; then
+ RULESET="self"
+ elif [[ "$raw_ruleset" == *"external"* ]]; then
+ RULESET="external"
+ else
+ echo "Error: Expected 'ruleset' in $info_file to contain 'self' or 'external' (got '$raw_ruleset')."
+ exit 1
+ fi
+
+ # Verify framework
+ if [[ "$FRAMEWORK" != "jax" && "$FRAMEWORK" != "pytorch" ]]; then
+ echo "Error: 'framework' in $info_file must be either 'jax' or 'pytorch' (got '$FRAMEWORK')."
+ exit 1
+ fi
+
+ echo "Submission Name: $SUBMISSION_NAME"
+ echo "Ruleset: $RULESET"
+ echo "Framework: $FRAMEWORK"
+ echo "Dry Run: $DRY_RUN"
+ echo "Max Global Steps: $MAX_GLOBAL_STEPS"
+}
+
+generate_config() {
+ local exp_prefix="submissions_a100_dry_run"
+ if [ "$DRY_RUN" = false ]; then
+ exp_prefix="submissions_a100"
+ fi
+
+ docker run \
+ --rm \
+ -v "$(pwd)":/algorithmic-efficiency \
+ -w /algorithmic-efficiency \
+ --entrypoint python \
+ "europe-west4-docker.pkg.dev/mlcommons-algoperf/algoperf-docker-repo/algoperf_${FRAMEWORK}_main:latest" \
+ algorithmic-efficiency/scoring/utils/slurm/make_job_config.py \
+ --framework="$FRAMEWORK" \
+ --tuning_ruleset="$RULESET" \
+ --submission_path="$SUBMISSION_PATH/submission.py" \
+ --experiment_dir="${exp_prefix}/$SUBMISSION_NAME"
+
+ mv config.json "$SUBMISSION_NAME.json"
+}
+
+prepare_sbatch_array() {
+ local num_jobs=$(jq 'length' "$SUBMISSION_NAME.json")
+ if [[ "$num_jobs" -eq 0 ]]; then
+ echo "Error: No jobs found in $SUBMISSION_NAME.json."
+ exit 1
+ fi
+
+ ARRAY_RANGE="0-$((num_jobs - 1))"
+ echo "Number of jobs: $num_jobs"
+ echo "Sbatch array range: $ARRAY_RANGE"
+
+ mkdir -p "experiments/tests/$SUBMISSION_NAME"
+}
+
+run_sbatch() {
+ local sbatch_cmd=(
+ sbatch
+ --array="$ARRAY_RANGE"
+ --output="experiments/tests/$SUBMISSION_NAME/job_%A_%a.out"
+ --error="experiments/tests/$SUBMISSION_NAME/job_%A_%a.err"
+ "algorithmic-efficiency/scoring/utils/slurm/run_jobs.sh"
+ --config_file "$(pwd)/$SUBMISSION_NAME.json"
+ --image "europe-west4-docker.pkg.dev/mlcommons-algoperf/algoperf-docker-repo/algoperf_${FRAMEWORK}_main:latest"
+ )
+
+ if [ -n "$MAX_GLOBAL_STEPS" ]; then
+ sbatch_cmd+=(--max_global_steps "$MAX_GLOBAL_STEPS")
+ fi
+
+ "${sbatch_cmd[@]}"
+}
+
+# --- Main ---
+
+main() {
+ verify_environment
+ parse_flags "$@"
+ extract_submission_info
+ generate_config
+ prepare_sbatch_array
+ run_sbatch
+}
+
+main "$@"