Merge pull request #670 from mlcommons/dev

dev -> main
mlcommons · Mar 5, 2024 · 6b188ba · 6b188ba
2 parents f9791d0 + 24632ad
commit 6b188ba
Show file tree

Hide file tree

Showing 653 changed files with 297,414 additions and 47 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,20 @@
 # Change Log
 
+## algoperf-benchmark-0.1.2 (2024-03-04)
+Workload variant additions and fixes:
+- Add Deepspeech workload variant
+- Fix bugs in Imagenet ResNet, WMT and Criteo1tb variants
+
+Add prize qualification logs for external tuning ruleset.
+Note: FastMRI trials with dropout are not yet added due to https://github.com/mlcommons/algorithmic-efficiency/issues/664.
+
+Add missing funcitonality to Docker startup script for self_tuning ruleset.
+Add self_tuning ruleset option to script that runs all workloads for scoring.
+
+Datasetup fixes.
+
+Fix tests that check training differences in PyTorch and JAX on GPU.
+
 ## algoperf-benchmark-0.1.1 (2024-01-19)
 Bug fixes to FastMRI metric calculation and targets.
 

diff --git a/GETTING_STARTED.md b/GETTING_STARTED.md
@@ -381,4 +381,13 @@ python score_submissions.py --submission_directory <directory_with_submissions>
 
 We provide the scores and performance profiles for the [paper baseline algorithms](/reference_algorithms/paper_baselines/) in the "Baseline Results" section in [Benchmarking Neural Network Training Algorithms](https://arxiv.org/abs/2306.07179).
 
+## Package Submission for Self-Reporting
+To prepare your submission for self reporting run:
+
+```
+python3 package_logs.py --experiment_dir <experiment_dir> --destination_dir <destination_dir>
+```
+
+The destination directiory will contain the logs packed in studies and trials required for self-reporting. 
+
 **Good Luck!**
diff --git a/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_jax/models.py b/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_jax/models.py
@@ -88,7 +88,7 @@ def scaled_init(key, shape, dtype=jnp.float_):
               stddev=jnp.sqrt(1.0 / mlp_top_dims[layer_idx])))(
                   top_mlp_input)
       x = nn.relu(x)
-      if self.dropout_rate > 0.0 and layer_idx == num_layers_top - 2:
+      if self.dropout_rate and layer_idx == num_layers_top - 2:
         x = nn.Dropout(rate=self.dropout_rate, deterministic=not train)(x)
       top_mlp_input += x
     # In the DLRM model the last layer width is always 1. We can hardcode that

diff --git a/algorithmic_efficiency/workloads/criteo1tb/workload.py b/algorithmic_efficiency/workloads/criteo1tb/workload.py
@@ -97,7 +97,7 @@ def max_allowed_runtime_sec(self) -> int:
 
   @property
   def eval_period_time_sec(self) -> int:
-    return 2 * 600  # 20 mins.
+    return 2 * 60  # 2 mins.
 
   def _build_input_queue(
       self,

diff --git a/algorithmic_efficiency/workloads/fastmri/fastmri_jax/workload.py b/algorithmic_efficiency/workloads/fastmri/fastmri_jax/workload.py
@@ -33,8 +33,10 @@ def init_model_fn(
         use_tanh=self.use_tanh,
         use_layer_norm=self.use_layer_norm,
         dropout_rate=dropout_rate)
-
-    variables = jax.jit(self._model.init)({'params': rng}, fake_batch)
+    params_rng, dropout_rng = jax.random.split(rng)
+    variables = jax.jit(
+        self._model.init)({'params': params_rng, 'dropout': dropout_rng},
+                          fake_batch)
     params = variables['params']
     self._param_shapes = param_utils.jax_param_shapes(params)
     self._param_types = param_utils.jax_param_types(self._param_shapes)

diff --git a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py
@@ -327,7 +327,7 @@ def _eval_model_on_split(self,
                            global_step: int = 0) -> Dict[str, float]:
     """Run a full evaluation of the model."""
     del global_step
-    if model_state is not None:
+    if model_state is not None and len(model_state) > 0:
       # Sync batch statistics across replicas before evaluating.
       model_state = self.sync_batch_stats(model_state)
 

diff --git a/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_jax/workload.py b/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_jax/workload.py
@@ -47,7 +47,8 @@ def init_model_fn(
     variables = model_init_fn({'params': params_rng, 'dropout': dropout_rng},
                               *fake_input_batch)
 
-    model_state = variables['batch_stats']
+    model_state = variables[
+        'batch_stats'] if not self.layernorm_everywhere else {}
     params = variables['params']
     self._param_shapes = param_utils.jax_param_shapes(params)
     self._param_types = param_utils.jax_param_types(self._param_shapes)

diff --git a/algorithmic_efficiency/workloads/wmt/wmt_jax/workload.py b/algorithmic_efficiency/workloads/wmt/wmt_jax/workload.py
@@ -121,7 +121,7 @@ def predict_step(self,
                    max_decode_len: int,
                    beam_size: int = 4) -> spec.Tensor:
     """Predict translation with fast decoding beam search on a batch."""
-    config = models.TransformerConfig(deterministic=True, decode=True)
+    config = replace(self._eval_model.config, decode=True)
     # Prepare transformer fast-decoder call for beam search: for beam search, we
     # need to set up our decoder model to handle a batch size equal to
     # batch_size * beam_size, where each batch item's data is expanded in-place

diff --git a/algorithmic_efficiency/workloads/workloads.py b/algorithmic_efficiency/workloads/workloads.py
@@ -26,7 +26,7 @@
     },
     'criteo1tb_embed_init': {
         'workload_path': 'criteo1tb/criteo1tb',
-        'workload_class_name': 'Criteo1TbDlrmSmallEmbeddingInitWorkload'
+        'workload_class_name': 'Criteo1TbDlrmSmallEmbedInitWorkload'
     },
     'criteo1tb_resnet': {
         'workload_path': 'criteo1tb/criteo1tb',

diff --git a/datasets/dataset_setup.py b/datasets/dataset_setup.py
@@ -706,13 +706,13 @@ def main(_):
           'to download the FastMRI dataset.\nSign up for the URLs at '
           'https://fastmri.med.nyu.edu/.')
 
-    updated_data_dir = download_fastmri(data_dir,
-                                        knee_singlecoil_train_url,
-                                        knee_singlecoil_val_url,
-                                        knee_singlecoil_test_url)
+    download_fastmri(data_dir,
+                     knee_singlecoil_train_url,
+                     knee_singlecoil_val_url,
+                     knee_singlecoil_test_url)
 
     logging.info('fastMRI download completed. Extracting...')
-    setup_fastmri(data_dir, updated_data_dir)
+    setup_fastmri(data_dir)
 
   if FLAGS.all or FLAGS.imagenet:
     flags.mark_flag_as_required('imagenet_train_url')

diff --git a/docker/scripts/startup.sh b/docker/scripts/startup.sh
@@ -50,6 +50,7 @@ HOME_DIR=""
 RSYNC_DATA="true"
 OVERWRITE="false"
 SAVE_CHECKPOINTS="true"
+TUNING_RULESET="external"
 
 # Pass flag
 while [ "$1" != "" ]; do
@@ -107,6 +108,10 @@ while [ "$1" != "" ]; do
             shift
             HOME_DIR=$1
             ;;
+        --tuning_ruleset)
+            shift
+            TUNING_RULESET=$1
+            ;;
         --num_tuning_trials)
             shift
             NUM_TUNING_TRIALS=$1
@@ -157,6 +162,7 @@ VALID_WORKLOADS=("criteo1tb" "imagenet_resnet" "imagenet_resnet_silu" "imagenet_
                  "librispeech_deepspeech_tanh" \
                  "librispeech_deepspeech_no_resnet" "librispeech_deepspeech_norm_and_spec_aug"
                  "fastmri_layernorm" "ogbg_gelu" "ogbg_silu" "ogbg_model_size")
+VALID_RULESETS=("self" "external")
 
 # Set data and experiment paths
 ROOT_DATA_BUCKET="gs://mlcommons-data"
@@ -167,17 +173,25 @@ EXPERIMENT_DIR="${HOME_DIR}/experiment_runs"
 
 if [[ -n ${DATASET+x} ]]; then 
     if [[ ! " ${VALID_DATASETS[@]} " =~ " $DATASET " ]]; then
-        echo "Error: invalid argument for dataset (d)."
+        echo "Error: invalid argument $DATASET for dataset (d)."
         exit 1
     fi
 fi
 
 if [[ -n ${WORKLOAD+x} ]]; then 
     if [[ ! " ${VALID_WORKLOADS[@]} " =~ " $WORKLOAD " ]]; then
-        echo "Error: invalid argument for workload (w)."
+        echo "Error: invalid argument $WORKLOAD for workload (w)."
+        exit 1
+    fi
+fi
+
+if [[ -n ${TUNING_RULESET+x} ]]; then 
+    if [[ ! " ${VALID_RULESETS[@]} " =~ " $TUNING_RULESET " ]]; then
+        echo "Error: invalid argument $TUNING_RULESET for tuning ruleset (tuning_ruleset)."
         exit 1
     fi
 fi
+TUNING_RULESET_FLAG="--tuning_ruleset=${TUNING_RULESET}"
 
 # Set run command prefix depending on framework
 if [[ "${FRAMEWORK}" == "jax" ]]; then
@@ -243,26 +257,42 @@ if [[ ! -z ${SUBMISSION_PATH+x} ]]; then
     if [[ ${FRAMEWORK} == "pytorch" ]]; then
         TORCH_COMPILE_FLAG="--torch_compile=true"
     fi
+
+    # Flags for rulesets
+    if [[ ${TUNING_RULESET} == "external" ]]; then
+        TUNING_SEARCH_SPACE_FLAG="--tuning_search_space=${TUNING_SEARCH_SPACE}"
+    fi
 
     # The TORCH_RUN_COMMAND_PREFIX is only set if FRAMEWORK is "pytorch"
-    COMMAND="${COMMAND_PREFIX} submission_runner.py \
+    BASE_COMMAND="${COMMAND_PREFIX} submission_runner.py \
         --framework=${FRAMEWORK}  \
         --workload=${WORKLOAD} \
         --submission_path=${SUBMISSION_PATH}  \
-        --tuning_search_space=${TUNING_SEARCH_SPACE}  \
         --data_dir=${DATA_DIR} \
         --num_tuning_trials=1  \
         --experiment_dir=${EXPERIMENT_DIR}  \
         --experiment_name=${EXPERIMENT_NAME} \
         --overwrite=${OVERWRITE} \
         --save_checkpoints=${SAVE_CHECKPOINTS} \
-        ${NUM_TUNING_TRIALS_FLAG} \
-        ${HPARAM_START_INDEX_FLAG} \
-        ${HPARAM_END_INDEX_FLAG} \
         ${RNG_SEED_FLAG} \
         ${MAX_STEPS_FLAG}  \
         ${SPECIAL_FLAGS} \
-        ${TORCH_COMPILE_FLAG} 2>&1 | tee -a ${LOG_FILE}"
+        ${TORCH_COMPILE_FLAG}"
+
+    if [[ ${TUNING_RULESET} == "external" ]]; then
+        COMMAND="${BASE_COMMAND} \
+                   ${TUNING_RULESET_FLAG} \
+                   ${TUNING_SEARCH_SPACE_FLAG} \
+                   ${NUM_TUNING_TRIALS_FLAG} \
+                   ${HPARAM_START_INDEX_FLAG} \
+                   ${HPARAM_END_INDEX_FLAG}"
+    else 
+        COMMAND="${BASE_COMMAND} \
+                   ${TUNING_RULESET_FLAG}"
+    fi
+
+    COMMAND="$COMMAND 2>&1 | tee -a ${LOG_FILE}"
+
     echo $COMMAND > ${LOG_FILE}
     echo $COMMAND
     eval $COMMAND

diff --git a/prize_qualification_baselines/README.md b/prize_qualification_baselines/README.md
@@ -2,6 +2,8 @@
 
 This directory contains the baseline(s) that submissions must beat to qualify for prizes, see the [Scoring Section](/COMPETITION_RULES.md#scoring) of the competition rules. For each ruleset there are 2 baselines (`*_target_setting.py` and `*_full_budget.py`). A submission must beat both baselines to be eligible for prizes.
 
+The experiment logs with training metrics are in `prize_qualification_baselines/logs`
+
 ## Externally Tuned Ruleset
 
 ### JAX