diff --git a/language/llama2-70b/README.md b/language/llama2-70b/README.md index 6c4b1c36a..a9a150b4c 100644 --- a/language/llama2-70b/README.md +++ b/language/llama2-70b/README.md @@ -8,6 +8,11 @@ ## Prepare environment +Copy the mlperf.conf file to this folder. +``` +cp ../../mlperf.conf . +``` + For a CPU-only run: ``` diff --git a/language/llama2-70b/mlperf.conf b/language/llama2-70b/mlperf.conf deleted file mode 100644 index 28c19bddf..000000000 --- a/language/llama2-70b/mlperf.conf +++ /dev/null @@ -1,74 +0,0 @@ -# The format of this config file is 'key = value'. -# The key has the format 'model.scenario.key'. Value is mostly int64_t. -# Model maybe '*' as wildcard. In that case the value applies to all models. -# All times are in milli seconds - -# Set performance_sample_count for each model. -# User can optionally set this to higher values in user.conf. -resnet50.*.performance_sample_count_override = 1024 -ssd-mobilenet.*.performance_sample_count_override = 256 -retinanet.*.performance_sample_count_override = 64 -bert.*.performance_sample_count_override = 10833 -dlrm.*.performance_sample_count_override = 204800 -dlrm-v2.*.performance_sample_count_override = 204800 -rnnt.*.performance_sample_count_override = 2513 -# set to 0 to let entire sample set to be performance sample -3d-unet.*.performance_sample_count_override = 0 - -# Set seeds. The seeds will be distributed two weeks before the submission. -*.*.qsl_rng_seed = 148687905518835231 -*.*.sample_index_rng_seed = 520418551913322573 -*.*.schedule_rng_seed = 811580660758947900 -# Set seeds for TEST_05. The seeds will be distributed two weeks before the submission. -*.*.test05_qsl_rng_seed = 793197339507417767 -*.*.test05_sample_index_rng_seed = 255610748586851044 -*.*.test05_schedule_rng_seed = 352213341366340113 - - -*.SingleStream.target_latency_percentile = 90 -*.SingleStream.min_duration = 600000 -#*.SingleStream.min_query_count = 1024 - -*.MultiStream.target_latency_percentile = 99 -*.MultiStream.samples_per_query = 8 -*.MultiStream.min_duration = 600000 -#*.MultiStream.min_query_count = 270336 -*.MultiStream.min_query_count = 662 -retinanet.MultiStream.target_latency = 528 - -# 3D-UNet uses equal issue mode -3d-unet.*.sample_concatenate_permutation = 1 - -# GPT-J uses equal issue mode for Single-Stream -gptj.SingleStream.sample_concatenate_permutation = 1 - -*.Server.target_latency = 10 -*.Server.target_latency_percentile = 99 -*.Server.target_duration = 0 -*.Server.min_duration = 600000 -#*.Server.min_query_count = 270336 -resnet50.Server.target_latency = 15 -retinanet.Server.target_latency = 100 -bert.Server.target_latency = 130 -dlrm.Server.target_latency = 60 -dlrm-v2.Server.target_latency = 60 -rnnt.Server.target_latency = 1000 -gptj.Server.target_latency = 20000 - -# Falcon Server scenario requires two latency constraints -llama2-70b.Server.target_latency = 2000 -llama2-70b.Server.ttft_latency = 2000 -llama2-70b.Server.tpot_latency = 200 - -*.Offline.target_latency_percentile = 90 -*.Offline.min_duration = 600000 -# In Offline scenario, we always have one query. But LoadGen maps this to -# min_sample_count internally in Offline scenario, so set this to 24576 since -# the rule requires that Offline scenario run for at least 24576 samples. -*.Offline.min_query_count = 24576 - -# These fields should be defined and overridden by user.conf. -*.SingleStream.target_latency = 10 -*.MultiStream.target_latency = 80 -*.Server.target_qps = 1.0 -*.Offline.target_qps = 1.0 diff --git a/tools/submission/submission_checker.py b/tools/submission/submission_checker.py index e3679cd30..e61590e34 100755 --- a/tools/submission/submission_checker.py +++ b/tools/submission/submission_checker.py @@ -1281,6 +1281,23 @@ "compliance_accuracy.txt", ] +OFFLINE_MIN_SPQ_SINCE_V4 = { + "resnet": 24576, + "retinanet": 24576, + "bert-99": 10833, + "bert-99.9": 10833, + "dlrm-v2-99": 24576, + "dlrm-v2-99.9": 24576, + "3d-unet-99": 43, + "3d-unet-99.9": 43, + "rnnt": 2513, + "gptj-99": 13368, + "gptj-99.9": 13368, + "llama2-70b-99": 24576, + "llama2-70b-99.9": 24576, + "stable-diffusion-xl": 5000 +} + SCENARIO_MAPPING = { "singlestream": "SingleStream", "multistream": "MultiStream", @@ -1526,7 +1543,7 @@ def __init__( self.seeds = self.base["seeds"] self.test05_seeds = self.base["test05_seeds"] self.accuracy_target = self.base["accuracy-target"] - self.accuracy_upper_limit = self.base["accuracy-upper-limit"] + self.accuracy_upper_limit = self.base.get("accuracy-upper-limit", {}) self.performance_sample_count = self.base["performance-sample-count"] self.latency_constraint = self.base.get("latency-constraint", {}) self.min_queries = self.base.get("min-queries", {}) @@ -2082,7 +2099,8 @@ def check_performance_dir( ) is_valid = False - if scenario == "Offline" and (samples_per_query < OFFLINE_MIN_SPQ): + if config.version in ["v0.5", "v0.7", "v1.0", "v1.1", "v2.0", "v2.1", "v3.0", "v3.1"] and scenario == "Offline" and (samples_per_query < OFFLINE_MIN_SPQ) or \ + scenario == "Offline" and (samples_per_query < OFFLINE_MIN_SPQ_SINCE_V4[model]): log.error( "%s Required minimum samples per query not met by user config, Expected=%s, Found=%s", fname,