diff --git a/cm-mlops/script/app-mlperf-inference/_cm.yaml b/cm-mlops/script/app-mlperf-inference/_cm.yaml index c86d07670f..66003bb54e 100644 --- a/cm-mlops/script/app-mlperf-inference/_cm.yaml +++ b/cm-mlops/script/app-mlperf-inference/_cm.yaml @@ -291,6 +291,9 @@ variations: CM_SKIP_RUN: - yes + qualcomm: + alias: kilt + kilt: group: implementation @@ -307,6 +310,26 @@ variations: CM_SKIP_RUN: - yes + kilt,qualcomm,resnet50: + default_variations: + precision: uint8 + + kilt,qualcomm,retinanet: + default_variations: + precision: uint8 + + kilt,qualcomm,resnet50: + default_variations: + precision: uint8 + + kilt,qualcomm,bert-99: + default_variations: + precision: uint8 + + kilt,qualcomm,bert-99.9: + default_variations: + precision: fp16 + resnet50: group: model diff --git a/cm-mlops/script/benchmark-any-mlperf-inference-implementation/_cm.yaml b/cm-mlops/script/benchmark-any-mlperf-inference-implementation/_cm.yaml index 63a205dcba..d916555a43 100644 --- a/cm-mlops/script/benchmark-any-mlperf-inference-implementation/_cm.yaml +++ b/cm-mlops/script/benchmark-any-mlperf-inference-implementation/_cm.yaml @@ -256,3 +256,30 @@ variations: env: CATEGORY: edge DIVISION: closed + + aws-dl2q.24xlarge: + group: sut + default_env: + EXTRA_ARGS: " --adr.mlperf-inference-implementation.tags=_dl2q.24xlarge" + CATEGORY: datacenter + DIVISION: closed + default_variations: + implementation: qualcomm + + aws-dl2q.24xlarge,qualcomm: + state: + resnet50: + qaic: + glow: + offline_target_qps: 153000 + server_target_qps: 149000 + retinanet: + qaic: + glow: + offline_target_qps: 2500 + server_target_qps: 2200 + bert-99.9: + qaic: + glow: + offline_target_qps: 350 + server_target_qps: 300 diff --git a/cm-mlops/script/benchmark-any-mlperf-inference-implementation/customize.py b/cm-mlops/script/benchmark-any-mlperf-inference-implementation/customize.py index 336a277cbf..ae6462118b 100644 --- a/cm-mlops/script/benchmark-any-mlperf-inference-implementation/customize.py +++ b/cm-mlops/script/benchmark-any-mlperf-inference-implementation/customize.py @@ -103,6 +103,10 @@ def preprocess(i): offline_target_qps = (((state.get(model, {})).get(device, {})).get(backend, {})).get('offline_target_qps') if offline_target_qps: add_to_run_cmd += f" --offline_target_qps={offline_target_qps}" + server_target_qps = (((state.get(model, {})).get(device, {})).get(backend, {})).get('server_target_qps') + if server_target_qps: + add_to_run_cmd += f" --server_target_qps={server_target_qps}" + else: #try to do a test run with reasonable number of samples to get and record the actual system performance if device == "cpu": if model == "resnet50": @@ -117,7 +121,7 @@ def preprocess(i): cmd = f'run_test "{model}" "{backend}" "{test_query_count}" "{implementation}" "{device}" "$find_performance_cmd"' cmds.append(cmd) #second argument is unused for submission_cmd - cmd = f'run_test "{model}" "{backend}" "100" "{implementation}" "{device}" "$submission_cmd" {add_to_run_cmd}' + cmd = f'run_test "{model}" "{backend}" "100" "{implementation}" "{device}" "$submission_cmd" "{add_to_run_cmd}"' singlestream_target_latency = (((state.get(model, {})).get(device, {})).get(backend, {})).get('singlestream_target_latency') if singlestream_target_latency: diff --git a/cm-mlops/script/benchmark-any-mlperf-inference-implementation/run-template.sh b/cm-mlops/script/benchmark-any-mlperf-inference-implementation/run-template.sh index 47dd7836a7..17c1ffa002 100644 --- a/cm-mlops/script/benchmark-any-mlperf-inference-implementation/run-template.sh +++ b/cm-mlops/script/benchmark-any-mlperf-inference-implementation/run-template.sh @@ -34,6 +34,8 @@ function run_test() { test_query_count=$3 implementation=$4 device=$5 + EXTRA_RUN_ARGS=$7 + echo "model=$model, backend=$2, test_query_count=$3, implementation=$4, device=$5, EXTRA_RUN_ARGS=$7" run "$6" } @@ -47,27 +49,27 @@ find_performance_cmd='cm run script --tags=generate-run-cmds,inference,_find-per find_ss_performance_cmd='cm run script --tags=generate-run-cmds,inference,_find-performance \ --model=$model --implementation=$implementation --device=$device --backend=$backend \ ---category=edge --division=open --scenario=SingleStream --quiet --test_query_count=$test_query_count $rerun ${EXTRA_ARGS}' +--category=edge --division=open --scenario=SingleStream --quiet --test_query_count=$test_query_count $rerun ${EXTRA_RUN_ARGS} ${EXTRA_ARGS}' submission_cmd='cm run script --tags=generate-run-cmds,inference,_submission,_all-scenarios \ --model=$model --implementation=$implementation --device=$device --backend=$backend \ --category=$category --division=$division --quiet \ ---skip_submission_generation=yes --execution-mode=valid ${POWER_STRING} ${EXTRA_ARGS}' +--skip_submission_generation=yes --execution-mode=valid ${POWER_STRING} ${EXTRA_RUN_ARGS} ${EXTRA_ARGS}' submission_cmd_scenario='cm run script --tags=generate-run-cmds,inference,_submission --scenario=$scenario \ --model=$model --implementation=$implementation --device=$device --backend=$backend \ --category=$category --division=$division --quiet \ ---skip_submission_generation=yes --execution-mode=valid ${POWER_STRING} ${EXTRA_ARGS}' +--skip_submission_generation=yes --execution-mode=valid ${POWER_STRING} ${EXTRA_RUN_ARGS} ${EXTRA_ARGS}' readme_cmd_single='cm run script --tags=generate-run-cmds,inference,_populate-readme --scenario=$scenario \ --model=$model --implementation=$implementation --device=$device --backend=$backend \ --category=$category --division=$division --quiet \ ---skip_submission_generation=yes --execution-mode=valid ${POWER_STRING} ${EXTRA_ARGS}' +--skip_submission_generation=yes --execution-mode=valid ${POWER_STRING} ${EXTRA_RUN_ARGS} ${EXTRA_ARGS}' readme_cmd='cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ --model=$model --implementation=$implementation --device=$device --backend=$backend \ --category=$category --division=$division --quiet \ ---skip_submission_generation=yes --execution-mode=valid ${POWER_STRING} ${EXTRA_ARGS}' +--skip_submission_generation=yes --execution-mode=valid ${POWER_STRING} ${EXTRA_RUN_ARGS} ${EXTRA_ARGS}' tflite_accuracy_cmd='cm run script --tags=run,mobilenet-models,_tflite,_accuracy-only$extra_tags \ diff --git a/cm-mlops/script/reproduce-mlperf-inference-qualcomm/_cm.yaml b/cm-mlops/script/reproduce-mlperf-inference-qualcomm/_cm.yaml index 1b1b0b73ea..9358f41f7b 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-qualcomm/_cm.yaml +++ b/cm-mlops/script/reproduce-mlperf-inference-qualcomm/_cm.yaml @@ -331,6 +331,7 @@ variations: CM_MLPERF_BACKEND_LIB_NAMESPEC: QAic bs.#: + group: batch-size env: kilt_model_batch_size: "#" adr: @@ -494,9 +495,10 @@ variations: singlestream: group: loadgen-scenario + default_variations: + batch-size: bs.1 env: CM_MLPERF_LOADGEN_SCENARIO: SingleStream - kilt_model_batch_size: 1 adr: qaic-model-compiler: tags: _singlestream @@ -563,6 +565,8 @@ variations: qaic_activation_count: "1" dl2q.24xlarge,resnet50,offline: + default_variations: + batch-size: bs.8 env: qaic_activation_count: "3" @@ -575,6 +579,8 @@ variations: qaic_activation_count: "14" dl2q.24xlarge,resnet50,server: + default_variations: + batch-size: bs.8 env: qaic_activation_count: "3" @@ -583,5 +589,7 @@ variations: qaic_activation_count: "14" dl2q.24xlarge,retinanet,server: + default_variations: + batch-size: bs.1 env: qaic_activation_count: "14" diff --git a/cm-mlops/script/run-mlperf-inference-app/_cm.yaml b/cm-mlops/script/run-mlperf-inference-app/_cm.yaml index 388d6b73e8..3f202b3934 100644 --- a/cm-mlops/script/run-mlperf-inference-app/_cm.yaml +++ b/cm-mlops/script/run-mlperf-inference-app/_cm.yaml @@ -32,7 +32,6 @@ default_env: CM_MLPERF_IMPLEMENTATION: reference CM_MLPERF_MODEL: resnet50 CM_MLPERF_RUN_STYLE: test - CM_OUTPUT_FOLDER_NAME: test_results input_mapping: backend: CM_MLPERF_BACKEND diff --git a/cm-mlops/script/run-mlperf-inference-app/customize.py b/cm-mlops/script/run-mlperf-inference-app/customize.py index 5ef6a63ac4..66d299d127 100644 --- a/cm-mlops/script/run-mlperf-inference-app/customize.py +++ b/cm-mlops/script/run-mlperf-inference-app/customize.py @@ -138,8 +138,12 @@ def preprocess(i): add_deps_recursive['mlperf-inference-implementation'] = {} add_deps_recursive['mlperf-inference-implementation']['tags'] = "_batch_size."+env['CM_MLPERF_LOADGEN_MAX_BATCHSIZE'] - if clean and 'OUTPUT_BASE_DIR' in env: - path_to_clean = os.path.join(env['OUTPUT_BASE_DIR'], env['CM_OUTPUT_FOLDER_NAME']) + if env.get('CM_OUTPUT_FOLDER_NAME', '') == '': + env['CM_OUTPUT_FOLDER_NAME'] = env['CM_MLPERF_RUN_STYLE'] + "_results" + + output_dir = os.path.join(env['OUTPUT_BASE_DIR'], env['CM_OUTPUT_FOLDER_NAME']) + if clean: + path_to_clean = output_dir print ('=========================================================') print ('Cleaning results in {}'.format(path_to_clean)) @@ -199,6 +203,8 @@ def preprocess(i): result_table, headers = mlperf_utils.get_result_table(state["cm-mlperf-inference-results"][sut]) print(tabulate(result_table, headers = headers, tablefmt="pretty")) + print(f"\nThe MLPerf inference results are stored at {output_dir}\n") + return {'return':0}