diff --git a/language/llama2-70b/README.md b/language/llama2-70b/README.md
index 6c4b1c36a..a9a150b4c 100644
--- a/language/llama2-70b/README.md
+++ b/language/llama2-70b/README.md
@@ -8,6 +8,11 @@
 
 ## Prepare environment
 
+Copy the mlperf.conf file to this folder.
+```
+cp ../../mlperf.conf .
+```
+
 For a CPU-only run:
 
 ```
diff --git a/language/llama2-70b/mlperf.conf b/language/llama2-70b/mlperf.conf
deleted file mode 100644
index 28c19bddf..000000000
--- a/language/llama2-70b/mlperf.conf
+++ /dev/null
@@ -1,74 +0,0 @@
-# The format of this config file is 'key = value'.
-# The key has the format 'model.scenario.key'. Value is mostly int64_t.
-# Model maybe '*' as wildcard. In that case the value applies to all models.
-# All times are in milli seconds
-
-# Set performance_sample_count for each model.
-# User can optionally set this to higher values in user.conf.
-resnet50.*.performance_sample_count_override = 1024
-ssd-mobilenet.*.performance_sample_count_override = 256
-retinanet.*.performance_sample_count_override = 64
-bert.*.performance_sample_count_override = 10833
-dlrm.*.performance_sample_count_override = 204800
-dlrm-v2.*.performance_sample_count_override = 204800
-rnnt.*.performance_sample_count_override = 2513
-# set to 0 to let entire sample set to be performance sample
-3d-unet.*.performance_sample_count_override = 0
-
-# Set seeds. The seeds will be distributed two weeks before the submission.
-*.*.qsl_rng_seed = 148687905518835231
-*.*.sample_index_rng_seed = 520418551913322573
-*.*.schedule_rng_seed = 811580660758947900
-# Set seeds for TEST_05. The seeds will be distributed two weeks before the submission.
-*.*.test05_qsl_rng_seed = 793197339507417767
-*.*.test05_sample_index_rng_seed = 255610748586851044
-*.*.test05_schedule_rng_seed = 352213341366340113
-
-
-*.SingleStream.target_latency_percentile = 90
-*.SingleStream.min_duration = 600000
-#*.SingleStream.min_query_count = 1024
-
-*.MultiStream.target_latency_percentile = 99
-*.MultiStream.samples_per_query = 8
-*.MultiStream.min_duration = 600000
-#*.MultiStream.min_query_count = 270336
-*.MultiStream.min_query_count = 662
-retinanet.MultiStream.target_latency = 528
-
-# 3D-UNet uses equal issue mode
-3d-unet.*.sample_concatenate_permutation = 1
-
-# GPT-J uses equal issue mode for Single-Stream
-gptj.SingleStream.sample_concatenate_permutation = 1
-
-*.Server.target_latency = 10
-*.Server.target_latency_percentile = 99
-*.Server.target_duration = 0
-*.Server.min_duration = 600000
-#*.Server.min_query_count = 270336
-resnet50.Server.target_latency = 15
-retinanet.Server.target_latency = 100
-bert.Server.target_latency = 130
-dlrm.Server.target_latency = 60
-dlrm-v2.Server.target_latency = 60
-rnnt.Server.target_latency = 1000
-gptj.Server.target_latency = 20000
-
-# Falcon Server scenario requires two latency constraints
-llama2-70b.Server.target_latency = 2000
-llama2-70b.Server.ttft_latency = 2000
-llama2-70b.Server.tpot_latency = 200
-
-*.Offline.target_latency_percentile = 90
-*.Offline.min_duration = 600000
-# In Offline scenario, we always have one query. But LoadGen maps this to
-# min_sample_count internally in Offline scenario, so set this to 24576 since
-# the rule requires that Offline scenario run for at least 24576 samples.
-*.Offline.min_query_count = 24576
-
-# These fields should be defined and overridden by user.conf.
-*.SingleStream.target_latency = 10
-*.MultiStream.target_latency = 80
-*.Server.target_qps = 1.0
-*.Offline.target_qps = 1.0
diff --git a/tools/submission/submission_checker.py b/tools/submission/submission_checker.py
index e3679cd30..e61590e34 100755
--- a/tools/submission/submission_checker.py
+++ b/tools/submission/submission_checker.py
@@ -1281,6 +1281,23 @@
     "compliance_accuracy.txt",
 ]
 
+OFFLINE_MIN_SPQ_SINCE_V4 = {
+    "resnet": 24576,
+    "retinanet": 24576,
+    "bert-99": 10833,
+    "bert-99.9": 10833,
+    "dlrm-v2-99": 24576,
+    "dlrm-v2-99.9": 24576,
+    "3d-unet-99": 43,
+    "3d-unet-99.9": 43,
+    "rnnt": 2513,
+    "gptj-99": 13368,
+    "gptj-99.9": 13368,
+    "llama2-70b-99": 24576,
+    "llama2-70b-99.9": 24576,
+    "stable-diffusion-xl": 5000
+}
+
 SCENARIO_MAPPING = {
     "singlestream": "SingleStream",
     "multistream": "MultiStream",
@@ -1526,7 +1543,7 @@ def __init__(
         self.seeds = self.base["seeds"]
         self.test05_seeds = self.base["test05_seeds"]
         self.accuracy_target = self.base["accuracy-target"]
-        self.accuracy_upper_limit = self.base["accuracy-upper-limit"]
+        self.accuracy_upper_limit = self.base.get("accuracy-upper-limit", {})
         self.performance_sample_count = self.base["performance-sample-count"]
         self.latency_constraint = self.base.get("latency-constraint", {})
         self.min_queries = self.base.get("min-queries", {})
@@ -2082,7 +2099,8 @@ def check_performance_dir(
                 )
                 is_valid = False
 
-        if scenario == "Offline" and (samples_per_query < OFFLINE_MIN_SPQ):
+        if config.version in ["v0.5", "v0.7", "v1.0", "v1.1", "v2.0", "v2.1", "v3.0", "v3.1"] and scenario == "Offline" and (samples_per_query < OFFLINE_MIN_SPQ) or \
+        scenario == "Offline" and (samples_per_query < OFFLINE_MIN_SPQ_SINCE_V4[model]):
             log.error(
                 "%s Required minimum samples per query not met by user config, Expected=%s, Found=%s",
                 fname,