Skip to content

Commit

Permalink
Add Multi-Architecture Support (x86_64, arm64) for Benchmarks (#227)
Browse files Browse the repository at this point in the history
* [Feat] Added architecture parameter in config file [x86_64 or arm64]

* [Fix] Change python version to 3.7

* [Feat] Changed python installter to check for architecture and install the package accordingly.

* [Feat] Get architecture and pass that as paramter while creating lambda function.

* [Feat] Pass architecture paramter while running the docker container.

* [Feat] Add architecture in config deserializer.

* [Feat] Add architecture support.

* [Feat] Add requirement for arm for 210.thumbnailer

* [Feat] Add requirement for arm for 501.graph-pagerank

* [Feat] Add requirement for arm for 502.graph-mst

* [Feat] Add requirement for arm for 503.graph-bfs

* [Feat] Add requirement for arm for 504.dna-visualisation

* [Feat] Add architecture support build for ffmpeg for 220.vide-processing

* Fail early in case of architecutre is not suppoerted in the platform.

* Add Architecture paramter when installing npm packages

* Testing NodeJS ( Multi architecture support)

* Upgrage version of sharp as previous 0.28 version didnt had the  prebuilt arm libraries

* Add approprivate version for python3.9 arm

* [dev] Linting

* [Feat] Fix order of parameters

* [Feat] Support AWS arm64 architecture also when updating functions

* [Feat] Add architecture to function name on AWS

* [Feat] Separate cached code packages and output directories based on architecture

* [Feat] Rename x86_64 to a simpler x64

* [Feat] Fix incorrect cache creation

* [system] Fix major bug in cache handling - we failed to update size of new code package

* [Feat] Adapt regression system to work with regression

* [Feat] Add architecture to code packages uploaded to cloud storage

* [dev] Linting

* [Feat] Fix paths to code packages in S3

* [Feat] Support Python 3.10 on ARM

* [Feat] Fix build of 504.dna-visualization on ARM where one package blows up code package size

* [aws] Implement backoff for AWS API that has a low limit on the number of concurrent requests

* [Feat] Expand docs

---------

Co-authored-by: prajinkhadka <prawjeenkhadka@gmail.com>
  • Loading branch information
mcopik and prajinkhadka authored Nov 6, 2024
1 parent 02ec0ba commit 4f7c5dd
Show file tree
Hide file tree
Showing 29 changed files with 345 additions and 96 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,6 @@
"author": "",
"license": "",
"dependencies": {
"sharp": "^0.28"
"sharp": "^0.32"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Pillow==10.0.0
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Pillow==10.0.0
16 changes: 13 additions & 3 deletions benchmarks/200.multimedia/220.video-processing/init.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,23 @@

DIR=$1
VERBOSE=$2
TARGET_ARCHITECTURE=$3

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
wget -q https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-amd64-static.tar.xz -P ${DIR}

if [[ "${TARGET_ARCHITECTURE}" == "arm64" ]]; then
FFMPEG_URL="https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-arm64-static.tar.xz"
else
FFMPEG_URL="https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-amd64-static.tar.xz"
fi

wget -q ${FFMPEG_URL} -P ${DIR}

pushd ${DIR} >/dev/null
tar -xf ffmpeg-release-amd64-static.tar.xz
tar -xf ffmpeg-release-*-static.tar.xz
rm *.tar.xz
mv ffmpeg-* ffmpeg
rm ffmpeg/ffprobe
rm -f ffmpeg/ffprobe
# make the binary executable
chmod 755 ffmpeg/ffmpeg
popd >/dev/null
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python-igraph==0.11.4
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python-igraph==0.11.4
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python-igraph==0.11.4
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python-igraph==0.11.4
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python-igraph==0.11.4
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python-igraph==0.11.4
21 changes: 21 additions & 0 deletions benchmarks/500.scientific/504.dna-visualisation/python/package.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Some dependencies have broken wheels
# For example, pandas wheel for 3.10 and arm ships libraries
# for all Linux versions and all Python versions
# This is too much for Lambda

PACKAGE_DIR=$1
echo "Original size $(du -sh $1 | cut -f1)"

CUR_DIR=$(pwd)
cd $1

# remove libraries for musl
find . -name "*aarch64-linux-musl.so" | xargs rm

version=$(echo "${PYTHON_VERSION}" | sed 's/\.//g')
echo "versions ${PYTHON_VERSION} ${version}"
# remove libraries for other Python versions
find . -name "*aarch64-linux-gnu.so" | grep -v ${version} | xargs rm

cd ${CUR_DIR}
echo "Stripped size $(du -sh $1 | cut -f1)"
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
squiggle==0.3.1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
squiggle==0.3.1
7 changes: 4 additions & 3 deletions config/example.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
"deployment": "openwhisk",
"update_code": false,
"update_storage": false,
"download_results": false,
"download_results": false,
"architecture": "arm64",
"runtime": {
"language": "python",
"version": "3.7"
"language": "nodejs",
"version": "16"
},
"type": "invocation-overhead",
"perf-cost": {
Expand Down
15 changes: 10 additions & 5 deletions config/systems.json
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@
"packages": []
}
}
}
},
"architecture": ["x64"]
},
"aws": {
"languages": {
Expand Down Expand Up @@ -93,7 +94,8 @@
}
}
}
}
},
"architecture": ["x64", "arm64"]
},
"azure": {
"languages": {
Expand Down Expand Up @@ -145,7 +147,8 @@
"manage": {
"username": "docker_user"
}
}
},
"architecture": ["x64"]
},
"gcp": {
"languages": {
Expand Down Expand Up @@ -196,7 +199,8 @@
}
}
}
}
},
"architecture": ["x64"]
},
"openwhisk": {
"languages": {
Expand Down Expand Up @@ -244,6 +248,7 @@
}
}
}
}
},
"architecture": ["x64"]
}
}
16 changes: 14 additions & 2 deletions dockerfiles/nodejs_installer.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,18 @@
#!/bin/bash

if [ -f $FILE ]; then
if [ -f /nvm/nvm.sh ]; then
. /nvm/nvm.sh
fi
cd /mnt/function && npm install && rm -rf package-lock.json
#
cd /mnt/function

if [ "${TARGET_ARCHITECTURE}" == "arm64" ]; then
npm install --arch=arm64
elif [ "${TARGET_ARCHITECTURE}" = "x64" ]; then
npm install --arch=x64
else
echo "Unsupported architecture: $TARGET_ARCHITECTURE"
exit 1
fi

rm -rf package-lock.json
29 changes: 26 additions & 3 deletions dockerfiles/python_installer.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,28 @@
#!/bin/bash

cd /mnt/function\
&& if test -f "requirements.txt.${PYTHON_VERSION}"; then pip3 -q install -r requirements.txt -r requirements.txt.${PYTHON_VERSION} -t .python_packages/lib/site-packages ; else pip3 -q install -r requirements.txt -t .python_packages/lib/site-packages ; fi\
&& if test -f "${SCRIPT_FILE}"; then /bin/bash ${SCRIPT_FILE} .python_packages/lib/site-packages ; fi
cd /mnt/function

PLATFORM_ARG=""
if [[ "${TARGET_ARCHITECTURE}" == "arm64" ]]; then
PLATFORM_ARG="--platform manylinux_2_17_aarch64 --only-binary=:all:"
fi

if [[ "${TARGET_ARCHITECTURE}" == "arm64" ]] && [[ -f "requirements.txt.arm.${PYTHON_VERSION}" ]]; then

pip3 -q install ${PLATFORM_ARG} -r requirements.txt.arm.${PYTHON_VERSION} -t .python_packages/lib/site-packages

elif [[ -f "requirements.txt.${PYTHON_VERSION}" ]]; then

pip3 -q install ${PLATFORM_ARG} -r requirements.txt.${PYTHON_VERSION} -t .python_packages/lib/site-packages

else

pip3 -q install ${PLATFORM_ARG} -r requirements.txt -t .python_packages/lib/site-packages

fi

if [[ -f "${SCRIPT_FILE}" ]]; then
/bin/bash ${SCRIPT_FILE} .python_packages/lib/site-packages
fi


27 changes: 15 additions & 12 deletions docs/benchmarks.md
Original file line number Diff line number Diff line change
@@ -1,24 +1,27 @@

## Benchmark Applications

| Type | Benchmark | Languages | Description |
| :--- | :---: | :---: | :---: |
| Webapps | 110.dynamic-html | Python, Node.js | Generate dynamic HTML from a template. |
| Webapps | 120.uploader | Python, Node.js | Uploader file from provided URL to cloud storage. |
| Multimedia | 210.thumbnailer | Python, Node.js | Generate a thumbnail of an image. |
| Multimedia | 220.video-processing | Python | Add a watermark and generate gif of a video file. |
| Utilities | 311.compression | Python | Create a .zip file for a group of files in storage and return to user to download. |
| Inference | 411.image-recognition | Python | Image recognition with ResNet and pytorch. |
| Scientific | 501.graph-pagerank | Python | PageRank implementation with igraph. |
| Scientific | 502.graph-mst | Python | Minimum spanning tree (MST) implementation with igraph. |
| Scientific | 503.graph-bfs | Python | Breadth-first search (BFS) implementation with igraph. |
| Scientific | 504.dna-visualisation | Python | Creates a visualization data for DNA sequence. |
| Type | Benchmark | Languages | Architecture | Description |
| :--- | :---: | :---: | :---: | :---: |
| Webapps | 110.dynamic-html | Python, Node.js | x64, arm64 | Generate dynamic HTML from a template. |
| Webapps | 120.uploader | Python, Node.js | x64, arm64 | Uploader file from provided URL to cloud storage. |
| Multimedia | 210.thumbnailer | Python, Node.js | x64, arm64 | Generate a thumbnail of an image. |
| Multimedia | 220.video-processing | Python | x64, arm64 | Add a watermark and generate gif of a video file. |
| Utilities | 311.compression | Python | x64, arm64 | Create a .zip file for a group of files in storage and return to user to download. |
| Inference | 411.image-recognition | Python | x64 | Image recognition with ResNet and pytorch. |
| Scientific | 501.graph-pagerank | Python | x64, arm64 | PageRank implementation with igraph. |
| Scientific | 502.graph-mst | Python | x64, arm64 | Minimum spanning tree (MST) implementation with igraph. |
| Scientific | 503.graph-bfs | Python | x64, arm64 | Breadth-first search (BFS) implementation with igraph. |
| Scientific | 504.dna-visualisation | Python | x64, arm64 | Creates a visualization data for DNA sequence. |

Below, we discuss the most important implementation details of each benchmark. For more details on benchmark selection and their characterization, please refer to [our paper](../README.md#publication).

> [!NOTE]
> Benchmarks whose number starts with the digit 0, such as `020.server-reply` are internal microbenchmarks used by specific experiments. They are not intended to be directly invoked by users.
> [!NOTE]
> ARM architecture is supported on AWS Lambda only.
> [!WARNING]
> Benchmark 411.image-recognition contains PyTorch which is often too large to fit into a code package. Up to Python 3.7, we can directly ship the dependencies. For Python 3.8, we use an additional zipping step that requires additional setup during the first run, making cold invocations slower. Warm invocations are not affected.
Expand Down
13 changes: 11 additions & 2 deletions sebs.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,12 +91,19 @@ def common_params(func):
type=click.Choice(["azure", "aws", "gcp", "local", "openwhisk"]),
help="Cloud deployment to use.",
)
@click.option(
"--architecture",
default=None,
type=click.Choice(["x64", "arm64"]),
help="Target architecture",
)
@click.option(
"--resource-prefix",
default=None,
type=str,
help="Resource prefix to look for.",
)

@simplified_common_params
@functools.wraps(func)
def wrapper(*args, **kwargs):
Expand All @@ -117,6 +124,7 @@ def parse_common_params(
deployment,
language,
language_version,
architecture,
resource_prefix: Optional[str] = None,
initialize_deployment: bool = True,
ignore_cache: bool = False,
Expand All @@ -139,6 +147,7 @@ def parse_common_params(
update_nested_dict(config_obj, ["deployment", "name"], deployment)
update_nested_dict(config_obj, ["experiments", "update_code"], update_code)
update_nested_dict(config_obj, ["experiments", "update_storage"], update_storage)
update_nested_dict(config_obj, ["experiments", "architecture"], architecture)

# set the path the configuration was loaded from
update_nested_dict(config_obj, ["deployment", "local", "path"], config)
Expand All @@ -149,7 +158,7 @@ def parse_common_params(

if initialize_deployment:
deployment_client = sebs_client.get_deployment(
config_obj["deployment"], logging_filename=logging_filename
config_obj, logging_filename=logging_filename
)
deployment_client.initialize(resource_prefix=resource_prefix)
else:
Expand Down Expand Up @@ -342,7 +351,7 @@ def regression(benchmark_input_size, benchmark_name, **kwargs):
sebs_client,
config["experiments"],
set((config["deployment"]["name"],)),
config["deployment"],
config,
benchmark_name,
)

Expand Down
43 changes: 35 additions & 8 deletions sebs/aws/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,12 @@ def package_code(

return os.path.join(directory, "{}.zip".format(benchmark)), bytes_size

def _map_architecture(self, architecture: str) -> str:

if architecture == "x64":
return "x86_64"
return architecture

def _map_language_runtime(self, language: str, runtime: str):

# AWS uses different naming scheme for Node.js versions
Expand All @@ -180,7 +186,7 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFun
func_name = AWS.format_function_name(func_name)
storage_client = self.get_storage()
function_cfg = FunctionConfig.from_benchmark(code_package)

architecture = function_cfg.architecture.value
# we can either check for exception or use list_functions
# there's no API for test
try:
Expand Down Expand Up @@ -217,7 +223,7 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFun
code_package_name = cast(str, os.path.basename(package))

code_bucket = storage_client.get_bucket(Resources.StorageBucketType.DEPLOYMENT)
code_prefix = os.path.join(benchmark, code_package_name)
code_prefix = os.path.join(benchmark, architecture, code_package_name)
storage_client.upload(code_bucket, package, code_prefix)

self.logging.info("Uploading function {} code to {}".format(func_name, code_bucket))
Expand All @@ -232,6 +238,7 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFun
MemorySize=memory,
Timeout=timeout,
Code=code_config,
Architectures=[self._map_architecture(architecture)],
)

lambda_function = LambdaFunction(
Expand Down Expand Up @@ -283,25 +290,42 @@ def update_function(self, function: Function, code_package: Benchmark):
name = function.name
code_size = code_package.code_size
package = code_package.code_location
benchmark = code_package.benchmark

function_cfg = FunctionConfig.from_benchmark(code_package)
architecture = function_cfg.architecture.value

# Run AWS update
# AWS Lambda limit on zip deployment
if code_size < 50 * 1024 * 1024:
with open(package, "rb") as code_body:
self.client.update_function_code(FunctionName=name, ZipFile=code_body.read())
self.client.update_function_code(
FunctionName=name,
ZipFile=code_body.read(),
Architectures=[self._map_architecture(architecture)],
)
# Upload code package to S3, then update
else:
code_package_name = os.path.basename(package)

storage = cast(S3, self.get_storage())
bucket = function.code_bucket(code_package.benchmark, storage)
storage.upload(bucket, package, code_package_name)
code_prefix = os.path.join(benchmark, architecture, code_package_name)
storage.upload(bucket, package, code_prefix)

self.client.update_function_code(
FunctionName=name, S3Bucket=bucket, S3Key=code_package_name
FunctionName=name,
S3Bucket=bucket,
S3Key=code_prefix,
Architectures=[self._map_architecture(architecture)],
)
self.wait_function_updated(function)
self.logging.info(f"Updated code of {name} function. ")
# and update config
self.client.update_function_configuration(
FunctionName=name, Timeout=function.config.timeout, MemorySize=function.config.memory
FunctionName=name,
Timeout=function.config.timeout,
MemorySize=function.config.memory,
)
self.wait_function_updated(function)
self.logging.info(f"Updated configuration of {name} function. ")
Expand All @@ -321,8 +345,11 @@ def update_function_configuration(self, function: Function, benchmark: Benchmark
@staticmethod
def default_function_name(code_package: Benchmark) -> str:
# Create function name
func_name = "{}-{}-{}".format(
code_package.benchmark, code_package.language_name, code_package.language_version
func_name = "{}-{}-{}-{}".format(
code_package.benchmark,
code_package.language_name,
code_package.language_version,
code_package.architecture,
)
return AWS.format_function_name(func_name)

Expand Down
Loading

0 comments on commit 4f7c5dd

Please sign in to comment.