Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,8 @@ RUN if [ "$framework" = "jax" ] ; then \
RUN cd /algorithmic-efficiency && git fetch origin
RUN cd /algorithmic-efficiency && git pull

# Todo: remove this, this is temporary for developing
COPY scripts/startup.sh /algorithmic-efficiency/docker/scripts/startup.sh
# Uncomment this for developing purposes
# COPY scripts/startup.sh /algorithmic-efficiency/docker/scripts/startup.sh
RUN chmod a+x /algorithmic-efficiency/docker/scripts/startup.sh

ENTRYPOINT ["bash", "/algorithmic-efficiency/docker/scripts/startup.sh"]
2 changes: 1 addition & 1 deletion scoring/performance_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
# workloads and rules for the scoring to be correct.
# We do not use the workload registry since it contains test and development
# workloads as well.
NUM_BASE_WORKLOADS = 8
NUM_BASE_WORKLOADS = 9
NUM_VARIANT_WORKLOADS = 0
NUM_TRIALS = 5
NUM_STUDIES = 3
Expand Down
52 changes: 11 additions & 41 deletions scoring/score_submissions.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,10 @@
FLAGS = flags.FLAGS


def get_summary_df(workload, workload_df, include_test_split=False):
def get_summary_df(workload, workload_df):
print(f' WORKLOAD: {workload}')
validation_metric, validation_target = (
scoring_utils.get_workload_metrics_and_targets(workload, split='validation')
scoring_utils.get_workload_metrics_and_targets(workload)
)

is_minimized = performance_profile.check_if_minimized(validation_metric)
Expand Down Expand Up @@ -127,7 +127,7 @@ def get_summary_df(workload, workload_df, include_test_split=False):

# compute the step times
def delta(series):
return series.shift(1, fill_value=0) - series
return series.apply(lambda x: np.diff(x, prepend=0))

accumulated_time_intervals = delta(workload_df['accumulated_submission_time'])
step_intervals = delta(workload_df['global_step'])
Expand All @@ -136,57 +136,27 @@ def delta(series):
f'WARNING: The number of evals may be too low to calculate reliable step time for {workload}'
)

summary_df['step_time (s)'] = np.median(
(accumulated_time_intervals / step_intervals).iloc[0]
)

summary_df['step_hint'] = scoring_utils.get_workload_stephint(workload)

# test metrics
if include_test_split:
test_metric, test_target = scoring_utils.get_workload_metrics_and_targets(
workload, split='test'
# Flatten all intervals from all trials and take the global median
with np.errstate(divide='ignore', invalid='ignore'):
all_ratios = np.concatenate(
(accumulated_time_intervals / step_intervals).values
)
summary_df['step_time (s)'] = np.nanmedian(all_ratios)

summary_df['test target metric name'] = test_metric
summary_df['test target metric value'] = test_target

summary_df['test target reached'] = (
workload_df[test_metric]
.apply(lambda x: target_op(x, test_target))
.apply(np.any)
)
summary_df['best metric value on test'] = workload_df[test_metric].apply(
lambda x: best_op(x)
)
workload_df['index best eval on test'] = workload_df[test_metric].apply(
lambda x: idx_op(x)
)
summary_df['time to best eval on test (s)'] = workload_df.apply(
lambda x: x['accumulated_submission_time'][x['index best eval on test']],
axis=1,
)
summary_df['time to target on test (s)'] = summary_df.apply(
lambda x: x['time to best eval on test (s)']
if x['test target reached']
else np.inf,
axis=1,
)
summary_df['step_hint'] = scoring_utils.get_workload_stephint(workload)

return summary_df


def get_submission_summary(df, include_test_split=False):
def get_submission_summary(df):
"""Summarizes the submission results into metric and time tables
organized by workload.
"""

dfs = []
print(df)
for workload, group in df.groupby('workload'):
summary_df = get_summary_df(
workload, group, include_test_split=include_test_split
)
summary_df = get_summary_df(workload, group)
dfs.append(summary_df)

df = pd.concat(dfs)
Expand Down
10 changes: 3 additions & 7 deletions scoring/scoring_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ def get_experiment_df(experiment_dir):


## Get workload properties
def get_workload_metrics_and_targets(workload, split='validation'):
def get_workload_metrics_and_targets(workload):
"""Returns workload target metric name and value."""
workload_name = re.match(WORKLOAD_NAME_PATTERN, workload).group(1)
framework = re.match(WORKLOAD_NAME_PATTERN, workload).group(2)
Expand All @@ -233,12 +233,8 @@ def get_workload_metrics_and_targets(workload, split='validation'):
workload_init_kwargs=workload_init_kwargs,
)
metric_name = workload_obj.target_metric_name
if split == 'validation':
metric = f'validation/{metric_name}'
target = workload_obj.validation_target_value
elif split == 'test':
metric = f'test/{metric_name}'
target = workload_obj.test_target_value
metric = f'validation/{metric_name}'
target = workload_obj.validation_target_value
return metric, target


Expand Down
23 changes: 23 additions & 0 deletions scoring/utils/slurm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,29 @@ LOGS_BUCKET="algoperf-runs-internal"
sbatch run_jobs.sh
```

## Convenient bash script to launch SLURM jobs

The run_submissions.sh script does all the steps above for you. It is intended to be used on a slurm login node. It however does expect a very specific directory structure. You need to be in the $HOME dir with the algorithmic-efficiency and submissions_algorithms git repos in the home dir.

```
$USER$@$USER$:~/$ tree -L 1
.
├── algorithmic-efficiency
└── submissions_algorithms
```

And you run the script with a command like so:

```
./algorithmic-efficiency/scoring/utils/slurm/run_submission.sh \
--submission_path submissions_algorithms/submissions/self_tuning/schedule_free_adamw_v2
--dry_run false
```

The submission path points to the dir where the submission exists (in the submissions git repo). `dry_run` is set to true by default (which limits max global steps to 10) to prevent accidental commands from wasting resources. Explicitly set it to false for full runs.

The script will figure out the rest and run them for you (creating the config, saving it to a path with a reasonable name, and running the sbatch script with the right flags).

# Set up new SLURM cluster

If you are setting up a new cluster, we recommend using the [HPC toolkit to set up a SLURM cluster](https://cloud.google.com/cluster-toolkit/docs/quickstarts/slurm-cluster).
Expand Down
39 changes: 25 additions & 14 deletions scoring/utils/slurm/make_job_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import json
import os
import struct

import jax
from absl import app, flags
Expand All @@ -17,8 +18,6 @@
TUNING_SEARCH_SPACE = (
'reference_algorithms/paper_baselines/adamw/tuning_search_space.json'
)
NUM_TUNING_TRIALS = 3 # For external tuning ruleset
NUM_STUDIES = 3

flags.DEFINE_string(
'submission_path',
Expand All @@ -35,11 +34,6 @@
'experiments',
'Path to experiment dir where logs will be saved.',
)
flags.DEFINE_string(
'experiment_dir',
'experiments/',
'Path to experiment dir where logs will be saved.',
)
flags.DEFINE_enum(
'framework',
'jax',
Expand All @@ -56,14 +50,13 @@
flags.DEFINE_string(
'workloads', None, help='Comma seperated list of workloads to run.'
)
flags.DEFINE_integer('num_studies', NUM_STUDIES, help='Number of studies.')
flags.DEFINE_integer('num_studies', None, help='Number of studies.')
flags.DEFINE_integer('num_tuning_trials', None, help='Number of tuning trials.')

FLAGS = flags.FLAGS

MIN_INT = -(2 ** (31))
MAX_INT = 2 ** (31) - 1
NUM_TUNING_TRIALS = 5 # For external tuning ruleset
NUM_STUDIES = 3

WORKLOADS = {
'imagenet_resnet': {'dataset': 'imagenet'},
Expand All @@ -74,6 +67,12 @@
'librispeech_deepspeech': {'dataset': 'librispeech'},
'criteo1tb': {'dataset': 'criteo1tb'},
'librispeech_conformer': {'dataset': 'librispeech'},
'finewebedu_lm': {'dataset': 'fineweb_edu_10B'}
}

RULESET_CONFIGS = {
'self': {'num_studies': 3, 'num_tuning_trials': 1},
'external': {'num_studies': 3, 'num_tuning_trials': 5},
}


Expand All @@ -83,17 +82,29 @@ def main(_):
else:
workloads = FLAGS.workloads.split(',')

key = jax.random.key(FLAGS.seed)
if not FLAGS.seed:
FLAGS.seed = struct.unpack('I', os.urandom(4))[0]

# Set defaults based on tuning_ruleset if not provided by user
num_studies = FLAGS.num_studies
if num_studies is None:
num_studies = RULESET_CONFIGS[FLAGS.tuning_ruleset]['num_studies']

num_tuning_trials = FLAGS.num_tuning_trials
if num_tuning_trials is None:
num_tuning_trials = RULESET_CONFIGS[FLAGS.tuning_ruleset]['num_tuning_trials']

key = jax.random.PRNGKey(FLAGS.seed)

jobs = []

for workload in workloads:
# Fold in hash(workload) mod(max(uint32))
workload_key = jax.random.fold_in(key, hash(workload) % (2**32 - 1))
for study_index in range(NUM_STUDIES):
for study_index in range(num_studies):
study_key = jax.random.fold_in(workload_key, study_index)
if FLAGS.tuning_ruleset == 'external':
for hparam_index in range(NUM_TUNING_TRIALS):
for hparam_index in range(num_tuning_trials):
run_key = jax.random.fold_in(study_key, hparam_index)
seed = jax.random.randint(run_key, (1,), MIN_INT, MAX_INT)[0].item()
print(seed)
Expand All @@ -107,7 +118,7 @@ def main(_):
job['experiment_dir'] = study_dir
job['rng_seed'] = seed
job['tuning_ruleset'] = FLAGS.tuning_ruleset
job['num_tuning_trials'] = NUM_TUNING_TRIALS
job['num_tuning_trials'] = num_tuning_trials
job['hparam_start_index'] = hparam_index
job['hparam_end_index'] = hparam_index + 1
job['tuning_search_space'] = FLAGS.tuning_search_space
Expand Down
Loading
Loading