Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Decoding non-unicode characters raises unhandled exception #554

Merged
merged 5 commits into from
Oct 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion aws_lambda_builders/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Common utilities for the library
"""

import locale
import logging
import os
import shutil
Expand Down Expand Up @@ -231,3 +231,28 @@ def extract_tarfile(tarfile_path: Union[str, os.PathLike], unpack_dir: Union[str
raise tarfile.ExtractError("Attempted Path Traversal in Tar File")

tar.extractall(unpack_dir)


def decode(to_decode: bytes, encoding: Optional[str] = None) -> str:
"""
Perform a "safe" decoding of a series of bytes. If the decoding works, returns the decoded bytes.
If the decoding fails, returns an empty string instead of throwing an exception.

Parameters
----------
to_decode: bytes
Series of bytes to be decoded
encoding: Optional[str]
Encoding type. If None, will attempt to find the correct encoding based on locale.

Returns
-------
str
Decoded string if decoding succeeds, empty string if decoding fails
"""
encoding = encoding if encoding else locale.getpreferredencoding()
try:
return to_decode.decode(encoding).strip()
except UnicodeDecodeError:
LOG.debug(f"Unable to decode bytes: {to_decode} with encoding: {encoding}")
return ""
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
Wrapper around calls to dotent CLI through a subprocess.
"""

import locale
import logging

from .utils import OSUtils
from aws_lambda_builders.utils import decode
from aws_lambda_builders.workflows.dotnet_clipackage.utils import OSUtils

LOG = logging.getLogger(__name__)

Expand Down Expand Up @@ -52,15 +52,14 @@ def run(self, args, cwd=None):
# DotNet output is in system locale dependent encoding
# https://learn.microsoft.com/en-us/dotnet/api/system.console.outputencoding?view=net-6.0#remarks
# "The default code page that the console uses is determined by the system locale."
encoding = locale.getpreferredencoding()
p = self.os_utils.popen(invoke_dotnet, stdout=self.os_utils.pipe, stderr=self.os_utils.pipe, cwd=cwd)

out, err = p.communicate()

# The package command contains lots of useful information on how the package was created and
# information when the package command was not successful. For that reason the output is
# always written to the output to help developers diagnose issues.
LOG.info(out.decode(encoding).strip())
LOG.info(decode(out))

if p.returncode != 0:
raise DotnetCLIExecutionError(message=err.decode(encoding).strip())
raise DotnetCLIExecutionError(message=decode(err))
4 changes: 2 additions & 2 deletions aws_lambda_builders/workflows/dotnet_clipackage/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import subprocess
import zipfile

from aws_lambda_builders.utils import which
from aws_lambda_builders.utils import decode, which

LOG = logging.getLogger(__name__)

Expand Down Expand Up @@ -96,7 +96,7 @@ def _extract(self, file_info, output_dir, zip_ref):
if not self._is_symlink(file_info):
return zip_ref.extract(file_info, output_dir)

source = zip_ref.read(file_info.filename).decode("utf8")
source = decode(zip_ref.read(file_info.filename))
link_name = os.path.normpath(os.path.join(output_dir, file_info.filename))

# make leading dirs if needed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import logging
import re

from aws_lambda_builders.utils import decode
from aws_lambda_builders.validator import RuntimeValidator
from aws_lambda_builders.workflows.java.utils import OSUtils

Expand Down Expand Up @@ -81,6 +82,6 @@ def _get_jvm_string(self, gradle_path):
return None

for line in stdout.splitlines():
l_dec = line.decode()
l_dec = decode(line)
if l_dec.startswith("JVM"):
return l_dec
8 changes: 5 additions & 3 deletions aws_lambda_builders/workflows/java_maven/maven.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import logging
import subprocess

from aws_lambda_builders.utils import decode

LOG = logging.getLogger(__name__)


Expand All @@ -28,10 +30,10 @@ def build(self, scratch_dir):
args = ["clean", "install"]
ret_code, stdout, _ = self._run(args, scratch_dir)

LOG.debug("Maven logs: %s", stdout.decode("utf8").strip())
LOG.debug("Maven logs: %s", decode(stdout))

if ret_code != 0:
raise MavenExecutionError(message=stdout.decode("utf8").strip())
raise MavenExecutionError(message=decode(stdout))

def copy_dependency(self, scratch_dir):
include_scope = "runtime"
Expand All @@ -40,7 +42,7 @@ def copy_dependency(self, scratch_dir):
ret_code, stdout, _ = self._run(args, scratch_dir)

if ret_code != 0:
raise MavenExecutionError(message=stdout.decode("utf8").strip())
raise MavenExecutionError(message=decode(stdout))

def _run(self, args, cwd=None):
p = self.os_utils.popen(
Expand Down
29 changes: 29 additions & 0 deletions tests/unit/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import platform

from unittest import TestCase
from unittest.mock import patch

from aws_lambda_builders import utils
from aws_lambda_builders.utils import decode


class Test_create_symlink_or_copy(TestCase):
Expand Down Expand Up @@ -30,3 +33,29 @@ def test_must_copy_if_symlink_fails(self, patched_copy_tree, pathced_os, patched

pathced_os.symlink.assert_called_once()
patched_copy_tree.assert_called_with(source_path, destination_path)


class TestDecode(TestCase):
def test_does_not_crash_non_utf8_encoding(self):
message = "hello\n\n ß".encode("iso-8859-1")
# Windows will decode this string as expected, *nix systems won't
expected_message = "hello\n\n ß" if platform.system().lower() == "windows" else ""
response = decode(message)
self.assertEqual(response, expected_message)

def test_is_able_to_decode_non_utf8_encoding(self):
message = "hello\n\n ß".encode("iso-8859-1")
response = decode(message, "iso-8859-1")
self.assertEqual(response, "hello\n\n ß")

@patch("aws_lambda_builders.utils.locale")
def test_isa_able_to_decode_non_utf8_locale(self, mock_locale):
mock_locale.getpreferredencoding.return_value = "iso-8859-1"
message = "hello\n\n ß".encode("iso-8859-1")
response = decode(message)
self.assertEqual(response, "hello\n\n ß")

def test_succeeds_with_utf8_encoding(self):
message = "hello".encode("utf-8")
response = decode(message)
self.assertEqual(response, "hello")
Loading