From 155fc1805dcb7a633e3f2de84fc485a8ddce3e84 Mon Sep 17 00:00:00 2001 From: ChengyuZhu6 Date: Wed, 11 Sep 2024 14:38:09 +0800 Subject: [PATCH] security-fix: resolve security issue with subprocess call using shell=True - Issue: Fixed [B602:subprocess_popen_with_shell_equals_true] identified by Bandit, which flagged the use of `subprocess.Popen` with `shell=True` as a high-severity security risk (CWE-78: OS Command Injection). - Ensures that the command is executed more securely without exposing it to shell injection vulnerabilities. Signed-off-by: ChengyuZhu6 --- benchmarks/auto_benchmark.py | 7 +- benchmarks/utils/common.py | 8 +- benchmarks/windows_install_dependencies.py | 6 +- binaries/s3_binary_upload.py | 7 +- .../chat_app/docker/torchserve_server_app.py | 4 +- .../intel_extension_for_pytorch/intel_gpu.py | 6 +- examples/text_classification/run_script.py | 4 +- setup.py | 18 ++-- test/pytest/test_benchmark.py | 4 +- test/pytest/test_example_dcgan.py | 3 +- test/pytest/test_torch_compile.py | 84 ++++++++++--------- test/pytest/test_torch_xla.py | 69 ++++++++------- test/pytest/test_utils.py | 6 +- ts_scripts/marsgen.py | 4 +- ts_scripts/print_env_info.py | 5 +- ts_scripts/utils.py | 5 +- .../test_integration_workflow_archiver.py | 5 +- 17 files changed, 143 insertions(+), 102 deletions(-) diff --git a/benchmarks/auto_benchmark.py b/benchmarks/auto_benchmark.py index c0d132dd2a..7a81c3da92 100644 --- a/benchmarks/auto_benchmark.py +++ b/benchmarks/auto_benchmark.py @@ -1,6 +1,7 @@ import argparse import datetime import os +import shlex import shutil from subprocess import Popen @@ -259,9 +260,13 @@ def clean_up_benchmark_env(bm_config): def execute(command, wait=False, stdout=None, stderr=None, shell=True): print("execute: {}".format(command)) + + # Split the command into a list of arguments + if isinstance(command, str): + command = shlex.split(command) + cmd = Popen( command, - shell=shell, close_fds=True, stdout=stdout, stderr=stderr, diff --git a/benchmarks/utils/common.py b/benchmarks/utils/common.py index aa2cb937c6..5e46f7f1e3 100644 --- a/benchmarks/utils/common.py +++ b/benchmarks/utils/common.py @@ -1,12 +1,16 @@ import os +import shlex from subprocess import Popen -def execute(command, wait=False, stdout=None, stderr=None, shell=True): +def execute(command, wait=False, stdout=None, stderr=None): print(command) + # Split the command into a list of arguments + if isinstance(command, str): + command = shlex.split(command) + cmd = Popen( command, - shell=shell, close_fds=True, stdout=stdout, stderr=stderr, diff --git a/benchmarks/windows_install_dependencies.py b/benchmarks/windows_install_dependencies.py index c7e153f02d..ac1cf73024 100644 --- a/benchmarks/windows_install_dependencies.py +++ b/benchmarks/windows_install_dependencies.py @@ -5,12 +5,14 @@ import subprocess import locale import shutil +import shlex import argparse def run(command): """Returns (return-code, stdout, stderr)""" - p = subprocess.Popen(command, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, shell=True) + if isinstance(command, str): + command = shlex.split(command) + p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) raw_output, raw_err = p.communicate() rc = p.returncode enc = locale.getpreferredencoding() diff --git a/binaries/s3_binary_upload.py b/binaries/s3_binary_upload.py index 1d66680794..a8e61d678d 100644 --- a/binaries/s3_binary_upload.py +++ b/binaries/s3_binary_upload.py @@ -2,6 +2,7 @@ import glob import logging import os +import shlex import subprocess import sys @@ -39,10 +40,10 @@ def s3_upload_local_folder(self, local_folder_path: str): """ LOGGER.info(f"Uploading *.whl files from folder: {local_folder_path}") s3_command = f"{self.s3_command} --exclude '*' --include '*.whl' {local_folder_path} {self.s3_bucket.rstrip('/')}/whl/{self.channel}" - + s3_command = shlex.split(s3_command) try: - ret_code = subprocess.run( - s3_command, check=True, stdout=subprocess.PIPE, universal_newlines=True, shell=True + subprocess.run( + s3_command, check=True, stdout=subprocess.PIPE, universal_newlines=True ) except subprocess.CalledProcessError as e: LOGGER.info(f"S3 upload command failed: {s3_command}. Exception: {e}") diff --git a/examples/LLM/llama/chat_app/docker/torchserve_server_app.py b/examples/LLM/llama/chat_app/docker/torchserve_server_app.py index adf6a31112..22a105bc90 100644 --- a/examples/LLM/llama/chat_app/docker/torchserve_server_app.py +++ b/examples/LLM/llama/chat_app/docker/torchserve_server_app.py @@ -15,9 +15,9 @@ def start_server(): + commands = ["torchserve", "--start", "--ts-config", "/home/model-server/config.properties"] subprocess.run( - ["torchserve --start --ts-config /home/model-server/config.properties"], - shell=True, + commands, check=True, ) while True: diff --git a/examples/intel_extension_for_pytorch/intel_gpu.py b/examples/intel_extension_for_pytorch/intel_gpu.py index 06a29da572..ffe3acc608 100644 --- a/examples/intel_extension_for_pytorch/intel_gpu.py +++ b/examples/intel_extension_for_pytorch/intel_gpu.py @@ -1,5 +1,6 @@ import csv import logging +import shlex import subprocess from io import StringIO @@ -11,8 +12,11 @@ def check_cmd(cmd): out = None + # Split the command into a list of arguments + if isinstance(command, str): + command = shlex.split(command) try: - out = subprocess.check_output(cmd, shell=True, timeout=5, text=True) + out = subprocess.check_output(cmd, timeout=5, text=True) except subprocess.TimeoutExpired: logging.error("Timeout running %s", cmd) except FileNotFoundError: diff --git a/examples/text_classification/run_script.py b/examples/text_classification/run_script.py index 1408f3a8fe..a6706db048 100644 --- a/examples/text_classification/run_script.py +++ b/examples/text_classification/run_script.py @@ -3,5 +3,5 @@ os.makedirs(".data",exist_ok=True) -cmd="python train.py AG_NEWS --device cpu --save-model-path model.pt --dictionary source_vocab.pt" -subprocess.run(cmd, shell=True,check=True) +cmd = ["python", "train.py", "AG_NEWS", "--device", "cpu", "--save-model-path", "model.pt", "--dictionary", "source_vocab.pt"] +subprocess.run(cmd, check=True) diff --git a/setup.py b/setup.py index d5a119ef38..09d02482b2 100644 --- a/setup.py +++ b/setup.py @@ -33,14 +33,14 @@ pkgs = find_packages(exclude=["ts_scripts", "test"]) build_frontend_command = { - "Windows": ".\\frontend\\gradlew.bat -p frontend clean assemble", - "Darwin": "frontend/gradlew -p frontend clean assemble", - "Linux": "frontend/gradlew -p frontend clean assemble", + "Windows": [".\\frontend\\gradlew.bat", "-p", "frontend", "clean", "assemble"], + "Darwin": ["frontend/gradlew", "-p", "frontend", "clean", "assemble"], + "Linux": ["frontend/gradlew", "-p", "frontend", "clean", "assemble"], } build_plugins_command = { - "Windows": ".\\plugins\\gradlew.bat -p plugins clean bS", - "Darwin": "plugins/gradlew -p plugins clean bS", - "Linux": "plugins/gradlew -p plugins clean bS", + "Windows": [".\\plugins\\gradlew.bat", "-p", "plugins", "clean", "bS"], + "Darwin": ["plugins/gradlew", "-p", "plugins", "clean", "bS"], + "Linux": ["plugins/gradlew", "-p", "plugins", "clean", "bS"], } @@ -94,7 +94,7 @@ def run(self): os.remove(self.source_server_file) try: - subprocess.check_call(build_frontend_command[platform.system()], shell=True) + subprocess.check_call(build_frontend_command[platform.system()]) except OSError: assert 0, "build failed" copy2(self.source_server_file, self.dest_file_name) @@ -131,9 +131,7 @@ def run(self): try: if self.plugins == "endpoints": - subprocess.check_call( - build_plugins_command[platform.system()], shell=True - ) + subprocess.check_call(build_plugins_command[platform.system()]) else: raise OSError("No such rule exists") except OSError: diff --git a/test/pytest/test_benchmark.py b/test/pytest/test_benchmark.py index f18fc9d1a6..db779666d7 100644 --- a/test/pytest/test_benchmark.py +++ b/test/pytest/test_benchmark.py @@ -40,10 +40,10 @@ def test_benchmark_e2e(backend): os.chdir(REPO_ROOT_DIR / "benchmarks") cmd = subprocess.Popen( - f"{sys.executable} ./benchmark-ab.py --concurrency 1 --requests 10 -bb {backend} --generate_graphs True", - shell=True, + [sys.executable, "./benchmark-ab.py", "--concurrency", "1", "--requests", "10", "-bb", backend, "--generate_graphs", "True"], stdout=subprocess.PIPE, ) + output_lines = list(cmd.stdout) assert output_lines[-1].decode("utf-8") == "Test suite execution complete.\n" diff --git a/test/pytest/test_example_dcgan.py b/test/pytest/test_example_dcgan.py index 39b0791cf8..44e938a617 100644 --- a/test/pytest/test_example_dcgan.py +++ b/test/pytest/test_example_dcgan.py @@ -42,8 +42,7 @@ def teardown_module(): def create_example_mar(): # Create only if not already present if not os.path.exists(DCGAN_MAR_FILE): - create_mar_cmd = "cd " + DCGAN_EXAMPLE_DIR + ";./create_mar.sh" - subprocess.check_call(create_mar_cmd, shell=True) + subprocess.check_call(["./create_mar.sh"], cwd=DCGAN_EXAMPLE_DIR) def delete_example_mar(): diff --git a/test/pytest/test_torch_compile.py b/test/pytest/test_torch_compile.py index 22d981d8eb..d9d246afe6 100644 --- a/test/pytest/test_torch_compile.py +++ b/test/pytest/test_torch_compile.py @@ -68,27 +68,39 @@ def chdir_example(monkeypatch): @pytest.mark.skipif(PT_2_AVAILABLE == False, reason="torch version is < 2.0.0") class TestTorchCompile: def teardown_class(self): - subprocess.run("torchserve --stop", shell=True, check=True) + subprocess.run(["torchserve", "--stop"], check=True) time.sleep(10) def test_archive_model_artifacts(self): assert len(glob.glob(MODEL_FILE)) == 1 assert len(glob.glob(YAML_CONFIG_STR)) == 1 assert len(glob.glob(YAML_CONFIG_DICT)) == 1 - subprocess.run(f"cd {TEST_DATA_DIR} && python model.py", shell=True, check=True) - subprocess.run(f"mkdir -p {MODEL_STORE_DIR}", shell=True, check=True) + subprocess.run(["python", "model.py"], cwd=TEST_DATA_DIR, check=True) + os.makedirs(MODEL_STORE_DIR, exist_ok=True) # register 2 models, one with the backend as str config, the other with the kwargs as dict config - subprocess.run( - f"torch-model-archiver --model-name {MODEL_NAME}_str --version 1.0 --model-file {MODEL_FILE} --serialized-file {SERIALIZED_FILE} --config-file {YAML_CONFIG_STR} --export-path {MODEL_STORE_DIR} --handler {HANDLER_FILE} -f", - shell=True, - check=True, - ) - subprocess.run( - f"torch-model-archiver --model-name {MODEL_NAME}_dict --version 1.0 --model-file {MODEL_FILE} --serialized-file {SERIALIZED_FILE} --config-file {YAML_CONFIG_DICT} --export-path {MODEL_STORE_DIR} --handler {HANDLER_FILE} -f", - shell=True, - check=True, - ) + subprocess.run([ + "torch-model-archiver", + "--model-name", f"{MODEL_NAME}_str", + "--version", "1.0", + "--model-file", MODEL_FILE, + "--serialized-file", SERIALIZED_FILE, + "--config-file", YAML_CONFIG_STR, + "--export-path", MODEL_STORE_DIR, + "--handler", HANDLER_FILE, + "-f" + ], check=True) + subprocess.run([ + "torch-model-archiver", + "--model-name", f"{MODEL_NAME}_dict", + "--version", "1.0", + "--model-file", MODEL_FILE, + "--serialized-file", SERIALIZED_FILE, + "--config-file", YAML_CONFIG_DICT, + "--export-path", MODEL_STORE_DIR, + "--handler", HANDLER_FILE, + "-f" + ], check=True) assert len(glob.glob(SERIALIZED_FILE)) == 1 assert ( len(glob.glob(os.path.join(MODEL_STORE_DIR, f"{MODEL_NAME}_str.mar"))) == 1 @@ -98,12 +110,16 @@ def test_archive_model_artifacts(self): ) def test_start_torchserve(self): - cmd = f"torchserve --start --ncs --models {MODEL_NAME}_str.mar,{MODEL_NAME}_dict.mar --model-store {MODEL_STORE_DIR} --enable-model-api --disable-token-auth" - subprocess.run( - cmd, - shell=True, - check=True, - ) + command = [ + "torchserve", + "--start", + "--ncs", + "--models", f"{MODEL_NAME}_str.mar,{MODEL_NAME}_dict.mar", + "--model-store", MODEL_STORE_DIR, + "--enable-model-api", + "--disable-token-auth" + ] + subprocess.run(command, check=True) time.sleep(10) assert len(glob.glob("logs/access_log.log")) == 1 assert len(glob.glob("logs/model_log.log")) == 1 @@ -114,12 +130,7 @@ def test_start_torchserve(self): reason="Test to be run outside docker", ) def test_server_status(self): - result = subprocess.run( - "curl http://localhost:8080/ping", - shell=True, - capture_output=True, - check=True, - ) + result = subprocess.run(["curl", "http://localhost:8080/ping"], capture_output=True, check=True) expected_server_status_str = '{"status": "Healthy"}' expected_server_status = json.loads(expected_server_status_str) assert json.loads(result.stdout) == expected_server_status @@ -129,12 +140,7 @@ def test_server_status(self): reason="Test to be run outside docker", ) def test_registered_model(self): - result = subprocess.run( - "curl http://localhost:8081/models", - shell=True, - capture_output=True, - check=True, - ) + result = subprocess.run(["curl", "http://localhost:8081/models"], capture_output=True, check=True) def _response_to_tuples(response_str): models = json.loads(response_str)["models"] @@ -155,13 +161,15 @@ def test_serve_inference(self): request_json = json.dumps(request_data) for model_name in [f"{MODEL_NAME}_str", f"{MODEL_NAME}_dict"]: - result = subprocess.run( - f"curl -s -X POST -H \"Content-Type: application/json;\" http://localhost:8080/predictions/{model_name} -d '{request_json}'", - shell=True, - capture_output=True, - check=True, - ) - + command = [ + "curl", + "-s", + "-X", "POST", + "-H", "Content-Type: application/json", + f"http://localhost:8080/predictions/{model_name}", + "-d", request_json + ] + result = subprocess.run(command, capture_output=True, check=True) string_result = result.stdout.decode("utf-8") float_result = float(string_result) expected_result = 3.5 diff --git a/test/pytest/test_torch_xla.py b/test/pytest/test_torch_xla.py index 8a1075c293..b26f7c8f9c 100644 --- a/test/pytest/test_torch_xla.py +++ b/test/pytest/test_torch_xla.py @@ -35,7 +35,7 @@ @pytest.mark.skipif(TORCHXLA_AVAILABLE == False, reason="PyTorch/XLA is not installed") class TestTorchXLA: def teardown_class(self): - subprocess.run("torchserve --stop", shell=True, check=True) + subprocess.run(["torchserve", "--stop"], check=True) time.sleep(10) def test_archive_model_artifacts(self): @@ -43,58 +43,67 @@ def test_archive_model_artifacts(self): assert len(glob.glob(YAML_CONFIG)) == 1 assert len(glob.glob(CONFIG_PROPERTIES)) == 1 subprocess.run( - f"cd {TORCH_XLA_TEST_DATA_DIR} && python model.py", shell=True, check=True + ["python", "model.py"], + cwd=TORCH_XLA_TEST_DATA_DIR, + check=True ) - subprocess.run(f"mkdir -p {MODEL_STORE_DIR}", shell=True, check=True) + os.makedirs(MODEL_STORE_DIR, exist_ok=True) subprocess.run( - f"torch-model-archiver --model-name {MODEL_NAME} --version 1.0 --model-file {MODEL_FILE} --serialized-file {SERIALIZED_FILE} --config-file {YAML_CONFIG} --export-path {MODEL_STORE_DIR} --handler base_handler -f", - shell=True, - check=True, + [ + "torch-model-archiver", + "--model-name", MODEL_NAME, + "--version", "1.0", + "--model-file", MODEL_FILE, + "--serialized-file", SERIALIZED_FILE, + "--config-file", YAML_CONFIG, + "--export-path", MODEL_STORE_DIR, + "--handler", "base_handler", + "-f" + ], + check=True ) assert len(glob.glob(SERIALIZED_FILE)) == 1 assert len(glob.glob(os.path.join(MODEL_STORE_DIR, f"{MODEL_NAME}.mar"))) == 1 def test_start_torchserve(self): - subprocess.run( - f"torchserve --start --ncs --models {MODEL_NAME}.mar --model-store {MODEL_STORE_DIR} --ts-config {CONFIG_PROPERTIES}", - shell=True, - check=True, - ) + command = [ + "torchserve", + "--start", + "--ncs", + "--models", f"{MODEL_NAME}.mar", + "--model-store", MODEL_STORE_DIR, + "--ts-config", CONFIG_PROPERTIES + ] + subprocess.run(command, check=True) time.sleep(10) assert len(glob.glob("logs/access_log.log")) == 1 assert len(glob.glob("logs/model_log.log")) == 1 assert len(glob.glob("logs/ts_log.log")) == 1 def test_server_status(self): - result = subprocess.run( - "curl http://localhost:8080/ping", - shell=True, - capture_output=True, - check=True, - ) + result = subprocess.run(["curl", "http://localhost:8080/ping"], capture_output=True, check=True) expected_server_status_str = '{"status": "Healthy"}' expected_server_status = json.loads(expected_server_status_str) assert json.loads(result.stdout) == expected_server_status def test_registered_model(self): - result = subprocess.run( - "curl http://localhost:8081/models", - shell=True, - capture_output=True, - check=True, - ) + result = subprocess.run(["curl", "http://localhost:8081/models"], capture_output=True, check=True) expected_registered_model_str = '{"models": [{"modelName": "half_plus_two", "modelUrl": "half_plus_two.mar"}]}' expected_registered_model = json.loads(expected_registered_model_str) assert json.loads(result.stdout) == expected_registered_model def test_serve_inference(self): - request = "'{\"" 'instances"' ": [[1.0], [2.0], [3.0]]}'" - result = subprocess.run( - f'curl -s -X POST -H "Content-Type: application/json;" http://localhost:8080/predictions/half_plus_two -d {request}', - shell=True, - capture_output=True, - check=True, - ) + request = {"instances": [[1.0], [2.0], [3.0]]} + request_json = json.dumps(request) + command = [ + "curl", + "-s", + "-X", "POST", + "-H", "Content-Type: application/json", + "http://localhost:8080/predictions/half_plus_two", + "-d", request_json + ] + result = subprocess.run(command, capture_output=True, check=True) expected_result_str = '{"predictions": [[2.5], [3.0], [3.5]]}' expected_result = json.loads(expected_result_str) assert json.loads(result.stdout) == expected_result diff --git a/test/pytest/test_utils.py b/test/pytest/test_utils.py index 059a2c903d..3e2ea8f6e6 100644 --- a/test/pytest/test_utils.py +++ b/test/pytest/test_utils.py @@ -7,7 +7,7 @@ import tempfile from os import path from pathlib import Path - +import shlex import orjson import requests @@ -167,10 +167,10 @@ def create_model_artifacts(items: dict, force=False, export_path=None) -> str: requirements_file=items.get("requirements_file"), export_path=export_path, ) - + command = shlex.split(command) print(f"## In directory: {os.getcwd()} | Executing command: {cmd}\n") try: - subprocess.check_call(cmd, shell=True) + subprocess.check_call(cmd) if str(items.get("archive_format")) == "no-archive": model_artifacts = "{0}".format(items.get("model_name")) elif str(items.get("archive_format")) == "tgz": diff --git a/ts_scripts/marsgen.py b/ts_scripts/marsgen.py index 4c7fb4a8a8..f15c6b7f02 100644 --- a/ts_scripts/marsgen.py +++ b/ts_scripts/marsgen.py @@ -1,6 +1,7 @@ import argparse import json import os +import shlex import shutil import subprocess import sys @@ -87,8 +88,9 @@ def generate_model(model, model_store_dir): export_path, ) print(f"## In directory: {os.getcwd()} | Executing command: {cmd}\n") + cmd = shlex.split(cmd) try: - subprocess.check_call(cmd, shell=True) + subprocess.check_call(cmd) marfile = "{}.mar".format(model["model_name"]) print("## {} is generated.\n".format(marfile)) mar_set.add(marfile) diff --git a/ts_scripts/print_env_info.py b/ts_scripts/print_env_info.py index 0e74a61661..da90355ae1 100644 --- a/ts_scripts/print_env_info.py +++ b/ts_scripts/print_env_info.py @@ -3,6 +3,7 @@ import locale import os import re +import shlex import subprocess import sys @@ -56,8 +57,10 @@ def get_nvidia_smi(): def run(command): """Returns (return-code, stdout, stderr)""" + if isinstance(command, str): + command = shlex.split(command) p = subprocess.Popen( - command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True + command, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) output, err = p.communicate() rc = p.returncode diff --git a/ts_scripts/utils.py b/ts_scripts/utils.py index de755b8d2d..0566edcb1d 100644 --- a/ts_scripts/utils.py +++ b/ts_scripts/utils.py @@ -1,5 +1,6 @@ import os import platform +import shlex import subprocess import sys @@ -48,8 +49,10 @@ def check_ts_version(): def try_and_handle(cmd, dry_run=False): if dry_run: print(f"Executing command: {cmd}") + if isinstance(cmd, str): + cmd = shlex.split(cmd) else: try: - subprocess.run([cmd], shell=True, check=True) + subprocess.run(cmd, check=True) except subprocess.CalledProcessError as e: raise (e) diff --git a/workflow-archiver/workflow_archiver/tests/integ_tests/test_integration_workflow_archiver.py b/workflow-archiver/workflow_archiver/tests/integ_tests/test_integration_workflow_archiver.py index 4b38615145..5b18352f72 100644 --- a/workflow-archiver/workflow_archiver/tests/integ_tests/test_integration_workflow_archiver.py +++ b/workflow-archiver/workflow_archiver/tests/integ_tests/test_integration_workflow_archiver.py @@ -2,6 +2,7 @@ import errno import json import os +import shlex import shutil import subprocess import workflow_archiver @@ -31,9 +32,11 @@ def delete_file_path(path): def run_test(test, cmd): it = test.get("iterations") if test.get("iterations") is not None else 1 + if isinstance(cmd, str): + cmd = shlex.split(cmd) for i in range(it): try: - subprocess.check_call(cmd, shell=True) + subprocess.check_call(cmd) except subprocess.CalledProcessError as exc: if test.get("expect-error") is not True: assert 0, "{}".format(exc.output)