From 3e33d83318e27d404d332dcb6010a8febdb98529 Mon Sep 17 00:00:00 2001 From: Daniel Mil Date: Fri, 6 Oct 2023 10:23:08 -0700 Subject: [PATCH 1/5] fix: Don't crash on decoding failure --- aws_lambda_builders/utils.py | 27 ++++++++++++++++++- .../workflows/dotnet_clipackage/dotnetcli.py | 7 +++-- .../workflows/dotnet_clipackage/utils.py | 5 ++-- .../workflows/java_gradle/gradle_validator.py | 3 ++- .../workflows/java_maven/maven.py | 8 +++--- tests/unit/test_utils.py | 25 +++++++++++++++++ 6 files changed, 64 insertions(+), 11 deletions(-) diff --git a/aws_lambda_builders/utils.py b/aws_lambda_builders/utils.py index d8db688fe..6dabe05bd 100644 --- a/aws_lambda_builders/utils.py +++ b/aws_lambda_builders/utils.py @@ -1,7 +1,7 @@ """ Common utilities for the library """ - +import locale import logging import os import shutil @@ -231,3 +231,28 @@ def extract_tarfile(tarfile_path: Union[str, os.PathLike], unpack_dir: Union[str raise tarfile.ExtractError("Attempted Path Traversal in Tar File") tar.extractall(unpack_dir) + + +def decode(to_decode: bytes, encoding: Optional[str] = None) -> str: + """ + Perform a "safe" decoding of a series of bytes. If the decoding works, returns the decoded bytes. + If the decoding fails, returns an empty string instead of throwing an exception. + + Parameters + ---------- + to_decode: bytes + Series of bytes to be decoded + encoding: Optional[str] + Encoding type. If None, will attempt to find the correct encoding based on locale. + + Returns + ------- + str + Decoded string if decoding succeeds, empty string if decoding fails + """ + encoding = encoding if encoding else locale.getpreferredencoding() + try: + return to_decode.decode(encoding).strip() + except UnicodeDecodeError: + LOG.debug(f"Unable to decode bytes: {to_decode} with encoding: {encoding}") + return "" diff --git a/aws_lambda_builders/workflows/dotnet_clipackage/dotnetcli.py b/aws_lambda_builders/workflows/dotnet_clipackage/dotnetcli.py index 1cbf0fa19..5af9c21b1 100644 --- a/aws_lambda_builders/workflows/dotnet_clipackage/dotnetcli.py +++ b/aws_lambda_builders/workflows/dotnet_clipackage/dotnetcli.py @@ -2,9 +2,9 @@ Wrapper around calls to dotent CLI through a subprocess. """ -import locale import logging +from ...utils import decode from .utils import OSUtils LOG = logging.getLogger(__name__) @@ -52,7 +52,6 @@ def run(self, args, cwd=None): # DotNet output is in system locale dependent encoding # https://learn.microsoft.com/en-us/dotnet/api/system.console.outputencoding?view=net-6.0#remarks # "The default code page that the console uses is determined by the system locale." - encoding = locale.getpreferredencoding() p = self.os_utils.popen(invoke_dotnet, stdout=self.os_utils.pipe, stderr=self.os_utils.pipe, cwd=cwd) out, err = p.communicate() @@ -60,7 +59,7 @@ def run(self, args, cwd=None): # The package command contains lots of useful information on how the package was created and # information when the package command was not successful. For that reason the output is # always written to the output to help developers diagnose issues. - LOG.info(out.decode(encoding).strip()) + LOG.info(decode(out)) if p.returncode != 0: - raise DotnetCLIExecutionError(message=err.decode(encoding).strip()) + raise DotnetCLIExecutionError(message=decode(err)) diff --git a/aws_lambda_builders/workflows/dotnet_clipackage/utils.py b/aws_lambda_builders/workflows/dotnet_clipackage/utils.py index b04e8dcb5..36868e7fb 100644 --- a/aws_lambda_builders/workflows/dotnet_clipackage/utils.py +++ b/aws_lambda_builders/workflows/dotnet_clipackage/utils.py @@ -7,7 +7,7 @@ import subprocess import zipfile -from aws_lambda_builders.utils import which +from aws_lambda_builders.utils import decode, which LOG = logging.getLogger(__name__) @@ -96,7 +96,8 @@ def _extract(self, file_info, output_dir, zip_ref): if not self._is_symlink(file_info): return zip_ref.extract(file_info, output_dir) - source = zip_ref.read(file_info.filename).decode("utf8") + source = zip_ref.read(file_info.filename) + source = decode(source) link_name = os.path.normpath(os.path.join(output_dir, file_info.filename)) # make leading dirs if needed diff --git a/aws_lambda_builders/workflows/java_gradle/gradle_validator.py b/aws_lambda_builders/workflows/java_gradle/gradle_validator.py index 5a190c183..2972e75b2 100644 --- a/aws_lambda_builders/workflows/java_gradle/gradle_validator.py +++ b/aws_lambda_builders/workflows/java_gradle/gradle_validator.py @@ -5,6 +5,7 @@ import logging import re +from aws_lambda_builders.utils import decode from aws_lambda_builders.validator import RuntimeValidator from aws_lambda_builders.workflows.java.utils import OSUtils @@ -81,6 +82,6 @@ def _get_jvm_string(self, gradle_path): return None for line in stdout.splitlines(): - l_dec = line.decode() + l_dec = decode(line) if l_dec.startswith("JVM"): return l_dec diff --git a/aws_lambda_builders/workflows/java_maven/maven.py b/aws_lambda_builders/workflows/java_maven/maven.py index 5f4dc614a..dbede6a09 100644 --- a/aws_lambda_builders/workflows/java_maven/maven.py +++ b/aws_lambda_builders/workflows/java_maven/maven.py @@ -5,6 +5,8 @@ import logging import subprocess +from aws_lambda_builders.utils import decode + LOG = logging.getLogger(__name__) @@ -28,10 +30,10 @@ def build(self, scratch_dir): args = ["clean", "install"] ret_code, stdout, _ = self._run(args, scratch_dir) - LOG.debug("Maven logs: %s", stdout.decode("utf8").strip()) + LOG.debug("Maven logs: %s", decode(stdout)) if ret_code != 0: - raise MavenExecutionError(message=stdout.decode("utf8").strip()) + raise MavenExecutionError(message=decode(stdout)) def copy_dependency(self, scratch_dir): include_scope = "runtime" @@ -40,7 +42,7 @@ def copy_dependency(self, scratch_dir): ret_code, stdout, _ = self._run(args, scratch_dir) if ret_code != 0: - raise MavenExecutionError(message=stdout.decode("utf8").strip()) + raise MavenExecutionError(message=decode(stdout)) def _run(self, args, cwd=None): p = self.os_utils.popen( diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 97d2ec16f..119ccb130 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -2,6 +2,7 @@ from unittest.mock import patch from aws_lambda_builders import utils +from aws_lambda_builders.utils import decode class Test_create_symlink_or_copy(TestCase): @@ -30,3 +31,27 @@ def test_must_copy_if_symlink_fails(self, patched_copy_tree, pathced_os, patched pathced_os.symlink.assert_called_once() patched_copy_tree.assert_called_with(source_path, destination_path) + + +class TestDecode(TestCase): + def test_does_not_crash_non_utf8_encoding(self): + message = "hello\n\n ß".encode("iso-8859-1") + response = decode(message) + self.assertEqual(response, "") + + def test_is_able_to_decode_non_utf8_encoding(self): + message = "hello\n\n ß".encode("iso-8859-1") + response = decode(message, "iso-8859-1") + self.assertEqual(response, "hello\n\n ß") + + @patch("aws_lambda_builders.utils.locale") + def test_isa_able_to_decode_non_utf8_locale(self, mock_locale): + mock_locale.getpreferredencoding.return_value = "iso-8859-1" + message = "hello\n\n ß".encode("iso-8859-1") + response = decode(message) + self.assertEqual(response, "hello\n\n ß") + + def test_succeeds_with_utf8_encoding(self): + message = "hello".encode("utf-8") + response = decode(message) + self.assertEqual(response, "hello") From cad3b9eea2308c1118feaf91e1e4e18f58af7970 Mon Sep 17 00:00:00 2001 From: Daniel Mil Date: Fri, 6 Oct 2023 10:26:41 -0700 Subject: [PATCH 2/5] Format files --- aws_lambda_builders/workflows/dotnet_clipackage/utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/aws_lambda_builders/workflows/dotnet_clipackage/utils.py b/aws_lambda_builders/workflows/dotnet_clipackage/utils.py index 36868e7fb..a1c0608f3 100644 --- a/aws_lambda_builders/workflows/dotnet_clipackage/utils.py +++ b/aws_lambda_builders/workflows/dotnet_clipackage/utils.py @@ -96,8 +96,7 @@ def _extract(self, file_info, output_dir, zip_ref): if not self._is_symlink(file_info): return zip_ref.extract(file_info, output_dir) - source = zip_ref.read(file_info.filename) - source = decode(source) + source = decode(zip_ref.read(file_info.filename)) link_name = os.path.normpath(os.path.join(output_dir, file_info.filename)) # make leading dirs if needed From 1ef97cb5c5f7e7945f247708733a2efcbc4c4159 Mon Sep 17 00:00:00 2001 From: Daniel Mil Date: Tue, 10 Oct 2023 09:46:05 -0700 Subject: [PATCH 3/5] Fix Windows test failure --- tests/unit/test_utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 119ccb130..28d334e58 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -1,3 +1,5 @@ +import platform + from unittest import TestCase from unittest.mock import patch @@ -36,8 +38,9 @@ def test_must_copy_if_symlink_fails(self, patched_copy_tree, pathced_os, patched class TestDecode(TestCase): def test_does_not_crash_non_utf8_encoding(self): message = "hello\n\n ß".encode("iso-8859-1") + expected_message = "hello\n\n ß" if platform.system().lower() == "windows" else "" response = decode(message) - self.assertEqual(response, "") + self.assertEqual(response, expected_message) def test_is_able_to_decode_non_utf8_encoding(self): message = "hello\n\n ß".encode("iso-8859-1") From 91841b57cc51284235b71539aad7c6b7b0346ef3 Mon Sep 17 00:00:00 2001 From: Daniel Mil Date: Tue, 10 Oct 2023 10:50:42 -0700 Subject: [PATCH 4/5] Add comment for test case --- tests/unit/test_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 28d334e58..ab703765c 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -38,6 +38,7 @@ def test_must_copy_if_symlink_fails(self, patched_copy_tree, pathced_os, patched class TestDecode(TestCase): def test_does_not_crash_non_utf8_encoding(self): message = "hello\n\n ß".encode("iso-8859-1") + # Windows will decode this string as expected, *nix systems won't expected_message = "hello\n\n ß" if platform.system().lower() == "windows" else "" response = decode(message) self.assertEqual(response, expected_message) From 966ec917f1c00783767e9683df97953087b9f945 Mon Sep 17 00:00:00 2001 From: Daniel Mil Date: Tue, 10 Oct 2023 13:25:59 -0700 Subject: [PATCH 5/5] Update paths to be absolute --- aws_lambda_builders/workflows/dotnet_clipackage/dotnetcli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aws_lambda_builders/workflows/dotnet_clipackage/dotnetcli.py b/aws_lambda_builders/workflows/dotnet_clipackage/dotnetcli.py index 5af9c21b1..f78a9b927 100644 --- a/aws_lambda_builders/workflows/dotnet_clipackage/dotnetcli.py +++ b/aws_lambda_builders/workflows/dotnet_clipackage/dotnetcli.py @@ -4,8 +4,8 @@ import logging -from ...utils import decode -from .utils import OSUtils +from aws_lambda_builders.utils import decode +from aws_lambda_builders.workflows.dotnet_clipackage.utils import OSUtils LOG = logging.getLogger(__name__)