diff --git a/.circleci/config.yml b/.circleci/config.yml index ce88738c..1c461ea8 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,7 +1,7 @@ version: 2.1 orbs: - python: circleci/python@0.2.1 + python: circleci/python@1.4.0 jobs: linting: @@ -12,7 +12,11 @@ jobs: key: deps1-{{ .Branch }}-{{ checksum "requirements.txt" }} - run: command: | - python3 install.py --aws --azure --gcp --dont-rebuild-docker-images --no-local + sudo apt update && sudo apt install libcurl4-openssl-dev + name: Install curl-config from Ubuntu APT + - run: + command: | + python3 install.py --aws --azure --gcp --no-local name: Install pip dependencies - run: command: | @@ -40,8 +44,8 @@ jobs: then ls $HOME/docker/*.tar.gz | xargs -I {file} sh -c "zcat {file} | docker load"; else - docker pull mcopik/serverless-benchmarks:build.aws.python.3.6 - docker pull mcopik/serverless-benchmarks:build.aws.nodejs.10.x + docker pull mcopik/serverless-benchmarks:build.aws.python.3.7 + docker pull mcopik/serverless-benchmarks:build.aws.nodejs.12.x fi name: Load Docker images - run: diff --git a/.dockerignore b/.dockerignore index a6790432..84416f19 100644 --- a/.dockerignore +++ b/.dockerignore @@ -6,3 +6,4 @@ config cache python-venv regression-* +*_code diff --git a/.gitignore b/.gitignore index e707ac85..4caca42c 100644 --- a/.gitignore +++ b/.gitignore @@ -170,3 +170,7 @@ dmypy.json sebs-* # cache cache + +# IntelliJ IDEA files +.idea +*.iml \ No newline at end of file diff --git a/.mypy.ini b/.mypy.ini index fece12c6..a1adeaed 100644 --- a/.mypy.ini +++ b/.mypy.ini @@ -30,5 +30,11 @@ ignore_missing_imports = True [mypy-google.api_core] ignore_missing_imports = True +[mypy-googleapiclient.discovery] +ignore_missing_imports = True + +[mypy-googleapiclient.errors] +ignore_missing_imports = True + [mypy-testtools] ignore_missing_imports = True diff --git a/README.md b/README.md index dd688775..007cd924 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,3 @@ -# SeBS: Serverless Benchmark Suite - -**FaaS benchmarking suite for serverless functions with automatic build, deployment, and measurements.** [![CircleCI](https://circleci.com/gh/spcl/serverless-benchmarks.svg?style=shield)](https://circleci.com/gh/spcl/serverless-benchmarks) ![Release](https://img.shields.io/github/v/release/spcl/serverless-benchmarks) @@ -8,25 +5,52 @@ ![GitHub issues](https://img.shields.io/github/issues/spcl/serverless-benchmarks) ![GitHub pull requests](https://img.shields.io/github/issues-pr/spcl/serverless-benchmarks) -SeBS is a diverse suite of FaaS benchmarks that allows an automatic performance analysis of +# SeBS: Serverless Benchmark Suite + +**FaaS benchmarking suite for serverless functions with automatic build, deployment, and measurements.** + +![Overview of SeBS features and components.](docs/overview.png) + +SeBS is a diverse suite of FaaS benchmarks that allows automatic performance analysis of commercial and open-source serverless platforms. We provide a suite of -[benchmark applications](#benchmark-applications) and [experiments](#experiments), +[benchmark applications](#benchmark-applications) and [experiments](#experiments) and use them to test and evaluate different components of FaaS systems. See the [installation instructions](#installation) to learn how to configure SeBS to use selected cloud services and [usage instructions](#usage) to automatically launch experiments in the cloud! -SeBS provides support for automatic deployment and invocation of benchmarks on -AWS Lambda, Azure Functions, Google Cloud Functions, and a custom, Docker-based local -evaluation platform. See the [documentation on cloud providers](docs/platforms.md) -to learn how to provide SeBS with cloud credentials. + +SeBS provides support for **automatic deployment** and invocation of benchmarks on +commercial and black-box platforms +[AWS Lambda](https://aws.amazon.com/lambda/), +[Azure Functions](https://azure.microsoft.com/en-us/services/functions/), +and [Google Cloud Functions](https://cloud.google.com/functions). +Furthermore, we support the open-source platform [OpenWhisk](https://openwhisk.apache.org/) +and offer a custom, Docker-based local evaluation platform. +See the [documentation on cloud providers](docs/platforms.md) +for details on configuring each platform in SeBS. The documentation describes in detail [the design and implementation of our tool](docs/design.md), and see the [modularity](docs/modularity.md) section to learn how SeBS can be extended with new platforms, benchmarks, and experiments. +Find out more about our project in [a paper summary](mcopik.github.io/projects/sebs/). + +Do you have further questions not answered by our documentation? +Did you encounter troubles with installing and using SeBS? +Or do you want to use SeBS in your work and you need new features? +Feel free to reach us through GitHub issues or by writing to . -SeBS can be used with our Docker image `spcleth/serverless-benchmarks:latest`, or the tool -can be [installed locally](#installation). -### Paper +For more information on how to configure, use and extend SeBS, see our +documentation: + +* [How to use SeBS?](docs/usage.md) +* [Which benchmark applications are offered?](docs/benchmarks.md) +* [Which experiments can be launched to evaluate FaaS platforms?](docs/experiment.md) +* [How to configure serverless platforms?](docs/platforms.md) +* [How SeBS builds and deploys functions?](docs/build.md) +* [How SeBS package is designed?](docs/design.md) +* [How to extend SeBS with new benchmarks, experiments, and platforms?](docs/modularity.md) + +### Publication When using SeBS, please cite our [Middleware '21 paper](https://dl.acm.org/doi/abs/10.1145/3464298.3476133). An extended version of our paper is [available on arXiv](https://arxiv.org/abs/2012.14132), and you can @@ -35,39 +59,28 @@ You can cite our software repository as well, using the citation button on the r ``` @inproceedings{copik2021sebs, - author={Marcin Copik and Grzegorz Kwasniewski and Maciej Besta and Michal Podstawski and Torsten Hoefler}, - title={SeBS: A Serverless Benchmark Suite for Function-as-a-Service Computing}, + author = {Copik, Marcin and Kwasniewski, Grzegorz and Besta, Maciej and Podstawski, Michal and Hoefler, Torsten}, + title = {SeBS: A Serverless Benchmark Suite for Function-as-a-Service Computing}, year = {2021}, + isbn = {9781450385343}, publisher = {Association for Computing Machinery}, + address = {New York, NY, USA}, url = {https://doi.org/10.1145/3464298.3476133}, doi = {10.1145/3464298.3476133}, booktitle = {Proceedings of the 22nd International Middleware Conference}, + pages = {64–78}, + numpages = {15}, + keywords = {benchmark, serverless, FaaS, function-as-a-service}, + location = {Qu\'{e}bec city, Canada}, series = {Middleware '21} } ``` -## Benchmark Applications - -For details on benchmark selection and their characterization, please refer to [our paper](#paper). - -| Type | Benchmark | Languages | Description | -| :--- | :---: | :---: | :---: | -| Webapps | 110.dynamic-html | Python, Node.js | Generate dynamic HTML from a template. | -| Webapps | 120.uploader | Python, Node.js | Uploader file from provided URL to cloud storage. | -| Multimedia | 210.thumbnailer | Python, Node.js | Generate a thumbnail of an image. | -| Multimedia | 220.video-processing | Python | Add a watermark and generate gif of a video file. | -| Utilities | 311.compression | Python | Create a .zip file for a group of files in storage and return to user to download. | -| Utilities | 504.dna-visualization | Python | Creates a visualization data for DNA sequence. | -| Inference | 411.image-recognition | Python | Image recognition with ResNet and pytorch. | -| Scientific | 501.graph-pagerank | Python | PageRank implementation with igraph. | -| Scientific | 501.graph-mst | Python | Minimum spanning tree (MST) implementation with igraph. | -| Scientific | 501.graph-bfs | Python | Breadth-first search (BFS) implementation with igraph. | - ## Installation Requirements: - Docker (at least 19) -- Python 3.6+ with: +- Python 3.7+ with: - pip - venv - `libcurl` and its headers must be available on your system to install `pycurl` @@ -78,7 +91,7 @@ Requirements: To install the benchmarks with a support for all platforms, use: ``` -./install.py --aws --azure --gcp --local +./install.py --aws --azure --gcp --openwhisk --local ``` It will create a virtual environment in `python-virtualenv`, install necessary Python @@ -92,153 +105,12 @@ virtual environment: Now you can deploy serverless experiments :-) The installation of additional platforms is controlled with the `--platform` and `--no-platform` -switches. Currently, the default behavior for `install.py` is to install only the local -environment. +switches. Currently, the default behavior for `install.py` is to install only the +local environment. **Make sure** that your Docker daemon is running and your user has sufficient permissions to use it. Otherwise you might see a lot of "Connection refused" and "Permission denied" errors when using SeBS. -To verify the correctness of installation, you can use [our regression testing](#regression). - -## Usage - -SeBS has three basic commands: `benchmark`, `experiment`, and `local`. -For each command you can pass `--verbose` flag to increase the verbosity of the output. -By default, all scripts will create a cache in directory `cache` to store code with -dependencies and information on allocated cloud resources. -Benchmarks will be rebuilt after a change in source code is detected. -To enforce redeployment of code and benchmark input please use flags `--update-code` -and `--update-storage`, respectively. -**Note:** the cache does not support updating cloud region. If you want to deploy benchmarks -to a new cloud region, then use a new cache directory. - -### Benchmark - -This command is used to build, deploy, and execute serverless benchmark in cloud. -The example below invokes the benchmark `110.dynamic-html` on AWS via the standard HTTP trigger. - -``` -./sebs.py benchmark invoke 110.dynamic-html test --config config/example.json --deployment aws --verbose -``` - -To configure your benchmark, change settings in the config file or use command-line options. -The full list is available by running `./sebs.py benchmark invoke --help`. - -### Regression - -Additionally, we provide a regression option to execute all benchmarks on a given platform. -The example below demonstrates how to run the regression suite with `test` input size on AWS. - -``` -./sebs.py benchmark regression test --config config/example.json --deployment aws -``` - -The regression can be executed on a single benchmark as well: - -``` -./sebs.py benchmark regression test --config config/example.json --deployment aws --benchmark-name 120.uploader -``` - -### Experiment - -This command is used to execute benchmarks described in the paper. The example below runs the experiment **perf-cost**: - -``` -./sebs.py experiment invoke perf-cost --config config/example.json --deployment aws -``` - -The configuration specifies that benchmark **110.dynamic-html** is executed 50 times, with 50 concurrent invocations, and both cold and warm invocations are recorded. - -```json -"perf-cost": { - "benchmark": "110.dynamic-html", - "experiments": ["cold", "warm"], - "input-size": "test", - "repetitions": 50, - "concurrent-invocations": 50, - "memory-sizes": [128, 256] -} -``` - -To download cloud metrics and process the invocations into a .csv file with data, run the process construct - -``` -./sebs.py experiment process perf-cost --config example.json --deployment aws -``` - -### Local - -In addition to the cloud deployment, we provide an opportunity to launch benchmarks locally with the help of [minio](https://min.io/) storage. -This allows us to conduct debugging and a local characterization of the benchmarks. - -To launch Docker containers, use the following command - this example launches benchmark `110.dynamic-html` with size `test`: - -``` -./sebs.py local start 110.dynamic-html test out.json --config config/example.json --deployments 1 -``` - -The output file `out.json` will contain the information on containers deployed and the endpoints that can be used to invoke functions: - -``` -{ - "functions": [ - { - "benchmark": "110.dynamic-html", - "hash": "5ff0657337d17b0cf6156f712f697610", - "instance_id": "e4797ae01c52ac54bfc22aece1e413130806165eea58c544b2a15c740ec7d75f", - "name": "110.dynamic-html-python-128", - "port": 9000, - "triggers": [], - "url": "172.17.0.3:9000" - } - ], - "inputs": [ - { - "random_len": 10, - "username": "testname" - } - ] -} -``` - -In our example, we can use `curl` to invoke the function with provided input: - -``` -curl 172.17.0.3:9000 --request POST --data '{"random_len": 10,"username": "testname"}' --header 'Content-Type: application/json' -``` - -To stop containers, you can use the following command: - -``` -./sebs.py local stop out.json -``` - -The stopped containers won't be automatically removed unless the option `--remove-containers` has been passed to the `start` command. - -## Experiments - -For details on experiments and methodology, please refer to [our paper](#paper). - -#### Performance & cost - -Invokes given benchmark a selected number of times, measuring the time and cost of invocations. -Supports `cold` and `warm` invocations with a selected number of concurrent invocations. -In addition, to accurately measure the overheads of Azure Function Apps, we offer `burst` and `sequential` invocation type that doesn't distinguish -between cold and warm startups. - -#### Network ping-pong - -Measures the distribution of network latency between benchmark driver and function instance. - -#### Invocation overhead - -The experiment performs the clock drift synchronization protocol to accurately measure the startup time of a function by comparing -benchmark driver and function timestamps. - -#### Eviction model - -Executes test functions multiple times, with varying size, memory and runtime configurations, to test for how long function instances stay alive. -The result helps to estimate the analytical models describing cold startups. -Currently supported only on AWS. +To verify the correctness of installation, you can use [our regression testing](docs/usage.md#regression). ## Authors @@ -247,4 +119,5 @@ Currently supported only on AWS. * [Nico Graf (ETH Zurich)](https://github.com/ncograf/) - contributed implementation of regression tests, bugfixes, and helped with testing and documentation. * [Kacper Janda](https://github.com/Kacpro), [Mateusz Knapik](https://github.com/maknapik), [JmmCz](https://github.com/JmmCz), AGH University of Science and Technology - contributed together Google Cloud support. * [Grzegorz Kwaśniewski (ETH Zurich)](https://github.com/gkwasniewski) - worked on the modeling experiments. +* [Paweł Żuk (University of Warsaw)](https://github.com/pmzuk) - contributed OpenWhisk support. diff --git a/benchmarks/000.microbenchmarks/010.sleep/nodejs/package.json b/benchmarks/000.microbenchmarks/010.sleep/nodejs/package.json new file mode 100644 index 00000000..967cd8b7 --- /dev/null +++ b/benchmarks/000.microbenchmarks/010.sleep/nodejs/package.json @@ -0,0 +1,9 @@ +{ + "name": "", + "version": "1.0.0", + "description": "", + "author": "", + "license": "", + "dependencies": { + } +} diff --git a/benchmarks/100.webapps/110.dynamic-html/python/requirements.txt b/benchmarks/100.webapps/110.dynamic-html/python/requirements.txt index 83e5040a..5ca56944 100644 --- a/benchmarks/100.webapps/110.dynamic-html/python/requirements.txt +++ b/benchmarks/100.webapps/110.dynamic-html/python/requirements.txt @@ -1 +1 @@ -jinja2==2.10.3 +jinja2>=2.10.3 diff --git a/benchmarks/100.webapps/120.uploader/nodejs/package.json b/benchmarks/100.webapps/120.uploader/nodejs/package.json index 6108bedf..7dcc22b1 100644 --- a/benchmarks/100.webapps/120.uploader/nodejs/package.json +++ b/benchmarks/100.webapps/120.uploader/nodejs/package.json @@ -4,8 +4,7 @@ "description": "", "author": "", "license": "", - "dependencies": {}, - "devDependencies": { + "dependencies": { "request": "^2.88.0" } } diff --git a/benchmarks/200.multimedia/210.thumbnailer/nodejs/package.json b/benchmarks/200.multimedia/210.thumbnailer/nodejs/package.json index a284651f..774a1492 100644 --- a/benchmarks/200.multimedia/210.thumbnailer/nodejs/package.json +++ b/benchmarks/200.multimedia/210.thumbnailer/nodejs/package.json @@ -5,6 +5,6 @@ "author": "", "license": "", "dependencies": { - "sharp": "^0.23.4" + "sharp": "^0.25" } } diff --git a/benchmarks/200.multimedia/210.thumbnailer/python/requirements.txt.3.9 b/benchmarks/200.multimedia/210.thumbnailer/python/requirements.txt.3.9 new file mode 100755 index 00000000..8da721c2 --- /dev/null +++ b/benchmarks/200.multimedia/210.thumbnailer/python/requirements.txt.3.9 @@ -0,0 +1 @@ +Pillow==9.0.0 diff --git a/benchmarks/200.multimedia/220.video-processing/init.sh b/benchmarks/200.multimedia/220.video-processing/init.sh index aa1d8243..688bb178 100755 --- a/benchmarks/200.multimedia/220.video-processing/init.sh +++ b/benchmarks/200.multimedia/220.video-processing/init.sh @@ -8,6 +8,7 @@ pushd ${DIR} > /dev/null tar -xf ffmpeg-release-amd64-static.tar.xz rm *.tar.xz mv ffmpeg-* ffmpeg +rm ffmpeg/ffprobe popd > /dev/null # copy watermark diff --git a/benchmarks/200.multimedia/220.video-processing/python/requirements.txt b/benchmarks/200.multimedia/220.video-processing/python/requirements.txt new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/400.inference/411.image-recognition/python/package.sh b/benchmarks/400.inference/411.image-recognition/python/package.sh index 1133cbac..41c07ac7 100644 --- a/benchmarks/400.inference/411.image-recognition/python/package.sh +++ b/benchmarks/400.inference/411.image-recognition/python/package.sh @@ -9,11 +9,11 @@ cd $1 rm -rf external find . -type d -name "tests" -exec rm -rf {} + find . -type d -name "test" -exec rm -rf {} + -find . -type d -name "bin" -exec rm -rf {} + +find . -type d -name "bin" -not -path "*/torch/*" -exec rm -rf {} + # cleaning -find -name "*.so" -not -path "*/PIL/*" | xargs strip -find -name "*.so.*" -not -path "*/PIL/*" | xargs strip +find -name "*.so" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" | xargs strip +find -name "*.so.*" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" | xargs strip rm -r pip > /dev/null rm -r pip-* > /dev/null diff --git a/benchmarks/400.inference/411.image-recognition/python/requirements.txt b/benchmarks/400.inference/411.image-recognition/python/requirements.txt index 0deb86c1..d191dc6d 100644 --- a/benchmarks/400.inference/411.image-recognition/python/requirements.txt +++ b/benchmarks/400.inference/411.image-recognition/python/requirements.txt @@ -2,6 +2,3 @@ #torchvision==0.4.0+cpu #https://download.pytorch.org/whl/cpu/torch-1.0.1.post2-cp37-cp37m-linux_x86_64.whl #torch==1.0.1.post2+cpu -Pillow==6.1 -torchvision==0.2.1 -numpy==1.16 diff --git a/benchmarks/400.inference/411.image-recognition/python/requirements.txt.3.6 b/benchmarks/400.inference/411.image-recognition/python/requirements.txt.3.6 index 5f270c60..63409aca 100644 --- a/benchmarks/400.inference/411.image-recognition/python/requirements.txt.3.6 +++ b/benchmarks/400.inference/411.image-recognition/python/requirements.txt.3.6 @@ -1 +1,4 @@ +Pillow==6.1 +numpy==1.16 https://download.pytorch.org/whl/cpu/torch-1.0.1.post2-cp36-cp36m-linux_x86_64.whl +torchvision==0.2.1 diff --git a/benchmarks/400.inference/411.image-recognition/python/requirements.txt.3.7 b/benchmarks/400.inference/411.image-recognition/python/requirements.txt.3.7 index 440811d5..54bddbd5 100644 --- a/benchmarks/400.inference/411.image-recognition/python/requirements.txt.3.7 +++ b/benchmarks/400.inference/411.image-recognition/python/requirements.txt.3.7 @@ -1 +1,4 @@ +Pillow==6.1 +numpy==1.16 https://download.pytorch.org/whl/cpu/torch-1.0.1.post2-cp37-cp37m-linux_x86_64.whl +torchvision==0.2.1 diff --git a/benchmarks/400.inference/411.image-recognition/python/requirements.txt.3.8 b/benchmarks/400.inference/411.image-recognition/python/requirements.txt.3.8 new file mode 100644 index 00000000..7d543dd8 --- /dev/null +++ b/benchmarks/400.inference/411.image-recognition/python/requirements.txt.3.8 @@ -0,0 +1,3 @@ +numpy==1.16 +https://download.pytorch.org/whl/cpu/torch-1.4.0%2Bcpu-cp38-cp38-linux_x86_64.whl +torchvision==0.5 diff --git a/benchmarks/400.inference/411.image-recognition/python/requirements.txt.3.9 b/benchmarks/400.inference/411.image-recognition/python/requirements.txt.3.9 new file mode 100644 index 00000000..fcf863e9 --- /dev/null +++ b/benchmarks/400.inference/411.image-recognition/python/requirements.txt.3.9 @@ -0,0 +1,3 @@ +numpy==1.18 +https://download.pytorch.org/whl/cpu/torch-1.8.0%2Bcpu-cp39-cp39-linux_x86_64.whl +torchvision==0.9.0 diff --git a/benchmarks/wrappers/openwhisk/nodejs/index.js b/benchmarks/wrappers/openwhisk/nodejs/index.js new file mode 100644 index 00000000..1cea01df --- /dev/null +++ b/benchmarks/wrappers/openwhisk/nodejs/index.js @@ -0,0 +1,36 @@ +const path = require('path'), fs = require('fs'); + +async function main(args) { + + var minio_args = ["MINIO_STORAGE_CONNECTION_URL", "MINIO_STORAGE_ACCESS_KEY", "MINIO_STORAGE_SECRET_KEY"]; + minio_args.forEach(function(arg){ + process.env[arg] = args[arg]; + delete args[arg]; + }); + + var func = require('/function/function.js'); + var begin = Date.now() / 1000; + var start = process.hrtime(); + var ret = await func.handler(args); + var elapsed = process.hrtime(start); + var end = Date.now() / 1000; + var micro = elapsed[1] / 1e3 + elapsed[0] * 1e6; + var is_cold = false; + var fname = path.join('/tmp', 'cold_run'); + if (!fs.existsSync(fname)) { + is_cold = true; + fs.closeSync(fs.openSync(fname, 'w')); + } + + return { + begin: begin, + end: end, + compute_time: micro, + results_time: 0, + result: ret, + request_id: process.env.__OW_ACTIVATION_ID, + is_cold: is_cold, + }; +} + +exports.main = main; diff --git a/benchmarks/wrappers/openwhisk/nodejs/storage.js b/benchmarks/wrappers/openwhisk/nodejs/storage.js new file mode 100644 index 00000000..1a57123c --- /dev/null +++ b/benchmarks/wrappers/openwhisk/nodejs/storage.js @@ -0,0 +1,62 @@ + +const minio = require('minio'), + uuid = require('uuid'), + util = require('util'), + stream = require('stream'), + fs = require('fs'); + +class minio_storage { + + constructor() { + let address = process.env.MINIO_STORAGE_CONNECTION_URL; + let access_key = process.env.MINIO_STORAGE_ACCESS_KEY; + let secret_key = process.env.MINIO_STORAGE_SECRET_KEY; + + this.client = new minio.Client( + { + endPoint: address.split(':')[0], + port: parseInt(address.split(':')[1], 10), + accessKey: access_key, + secretKey: secret_key, + useSSL: false + } + ); + } + + unique_name(file) { + let [name, extension] = file.split('.'); + let uuid_name = uuid.v4().split('-')[0]; + return util.format('%s.%s.%s', name, uuid_name, extension); + } + + upload(bucket, file, filepath) { + let uniqueName = this.unique_name(file); + return [uniqueName, this.client.fPutObject(bucket, uniqueName, filepath)]; + }; + + download(bucket, file, filepath) { + return this.client.fGetObject(bucket, file, filepath); + }; + + uploadStream(bucket, file) { + var write_stream = new stream.PassThrough(); + let uniqueName = this.unique_name(file); + let promise = this.client.putObject(bucket, uniqueName, write_stream, write_stream.size); + return [write_stream, promise, uniqueName]; + }; + + downloadStream(bucket, file) { + var read_stream = new stream.PassThrough(); + return this.client.getObject(bucket, file); + }; + + static get_instance() { + if(!this.instance) { + this.instance = new storage(); + } + return this.instance; + } + + +}; +exports.storage = minio_storage; diff --git a/benchmarks/wrappers/openwhisk/python/__main__.py b/benchmarks/wrappers/openwhisk/python/__main__.py new file mode 100644 index 00000000..51e6db4d --- /dev/null +++ b/benchmarks/wrappers/openwhisk/python/__main__.py @@ -0,0 +1,40 @@ +import logging +import datetime +import os + + +def main(args): + logging.getLogger().setLevel(logging.INFO) + begin = datetime.datetime.now() + args['request-id'] = os.getenv('__OW_ACTIVATION_ID') + args['income-timestamp'] = begin.timestamp() + + for arg in ["MINIO_STORAGE_CONNECTION_URL", "MINIO_STORAGE_ACCESS_KEY", "MINIO_STORAGE_SECRET_KEY"]: + os.environ[arg] = args[arg] + del args[arg] + + from function import function + ret = function.handler(args) + + end = datetime.datetime.now() + logging.info("Function result: {}".format(ret)) + log_data = {"result": ret["result"]} + if "measurement" in ret: + log_data["measurement"] = ret["measurement"] + + results_time = (end - begin) / datetime.timedelta(microseconds=1) + + is_cold = False + fname = "cold_run" + if not os.path.exists(fname): + is_cold = True + open(fname, "a").close() + + return { + "begin": begin.strftime("%s.%f"), + "end": end.strftime("%s.%f"), + "request_id": os.getenv('__OW_ACTIVATION_ID'), + "results_time": results_time, + "is_cold": is_cold, + "result": log_data, + } diff --git a/benchmarks/wrappers/openwhisk/python/setup.py b/benchmarks/wrappers/openwhisk/python/setup.py new file mode 100644 index 00000000..b942d059 --- /dev/null +++ b/benchmarks/wrappers/openwhisk/python/setup.py @@ -0,0 +1,14 @@ +from distutils.core import setup +from glob import glob +from pkg_resources import parse_requirements + +with open('requirements.txt') as f: + requirements = [str(r) for r in parse_requirements(f)] + +setup( + name='function', + install_requires=requirements, + packages=['function'], + package_dir={'function': '.'}, + package_data={'function': glob('**', recursive=True)}, +) \ No newline at end of file diff --git a/benchmarks/wrappers/openwhisk/python/storage.py b/benchmarks/wrappers/openwhisk/python/storage.py new file mode 100644 index 00000000..920a4807 --- /dev/null +++ b/benchmarks/wrappers/openwhisk/python/storage.py @@ -0,0 +1,77 @@ +import os +import uuid +import json +import minio +import logging + + +class storage: + instance = None + client = None + + def __init__(self): + try: + """ + Minio does not allow another way of configuring timeout for connection. + The rest of configuration is copied from source code of Minio. + """ + import urllib3 + from datetime import timedelta + + timeout = timedelta(seconds=1).seconds + + mgr = urllib3.PoolManager( + timeout=urllib3.util.Timeout(connect=timeout, read=timeout), + maxsize=10, + retries=urllib3.Retry( + total=5, backoff_factor=0.2, status_forcelist=[500, 502, 503, 504] + ) + ) + self.client = minio.Minio( + os.getenv("MINIO_STORAGE_CONNECTION_URL"), + access_key=os.getenv("MINIO_STORAGE_ACCESS_KEY"), + secret_key=os.getenv("MINIO_STORAGE_SECRET_KEY"), + secure=False, + http_client=mgr + ) + except Exception as e: + logging.info(e) + raise e + + @staticmethod + def unique_name(name): + name, extension = name.split(".") + return "{name}.{random}.{extension}".format( + name=name, extension=extension, random=str(uuid.uuid4()).split("-")[0] + ) + + def upload(self, bucket, file, filepath): + key_name = storage.unique_name(file) + self.client.fput_object(bucket, key_name, filepath) + return key_name + + def download(self, bucket, file, filepath): + self.client.fget_object(bucket, file, filepath) + + def download_directory(self, bucket, prefix, path): + objects = self.client.list_objects(bucket, prefix, recursive=True) + for obj in objects: + file_name = obj.object_name + self.download(bucket, file_name, os.path.join(path, file_name)) + + def upload_stream(self, bucket, file, bytes_data): + key_name = storage.unique_name(file) + self.client.put_object( + bucket, key_name, bytes_data, bytes_data.getbuffer().nbytes + ) + return key_name + + def download_stream(self, bucket, file): + data = self.client.get_object(bucket, file) + return data.read() + + @staticmethod + def get_instance(): + if storage.instance is None: + storage.instance = storage() + return storage.instance diff --git a/config/example.json b/config/example.json index 0690b85e..dc4da9ad 100644 --- a/config/example.json +++ b/config/example.json @@ -1,11 +1,12 @@ { "experiments": { + "deployment": "openwhisk", "update_code": false, "update_storage": false, "download_results": false, "runtime": { "language": "python", - "version": "3.6" + "version": "3.7" }, "type": "invocation-overhead", "perf-cost": { @@ -52,6 +53,40 @@ "region": "europe-west1", "project_name": "", "credentials": "" + }, + "local": { + "storage": { + "address": "", + "mapped_port": -1, + "access_key": "", + "secret_key": "", + "instance_id": "", + "input_buckets": [], + "output_buckets": [], + "type": "minio" + } + }, + "openwhisk": { + "shutdownStorage": false, + "removeCluster": false, + "wskBypassSecurity": "true", + "wskExec": "wsk", + "experimentalManifest": false, + "docker_registry": { + "registry": "", + "username": "", + "password": "" + }, + "storage": { + "address": "", + "mapped_port": -1, + "access_key": "", + "secret_key": "", + "instance_id": "", + "input_buckets": [], + "output_buckets": [], + "type": "minio" + } } } } diff --git a/config/openwhisk.json b/config/openwhisk.json new file mode 100644 index 00000000..c41b4966 --- /dev/null +++ b/config/openwhisk.json @@ -0,0 +1,20 @@ +{ + "experiments": { + "update_code": false, + "update_storage": false, + "download_results": false, + "deployment": "openwhisk", + "runtime": { + "language": "python", + "version": "3.6" + } + }, + "deployment": { + "name": "openwhisk", + "shutdownStorage": false, + "removeCluster": false, + "wskBypassSecurity": "true", + "wskExec": "wsk", + "experimentalManifest": "false" + } +} diff --git a/config/systems.json b/config/systems.json index 7a3bf450..c38f1233 100644 --- a/config/systems.json +++ b/config/systems.json @@ -10,7 +10,8 @@ "languages": { "python": { "base_images": { - "3.6": "python:3.6-slim" + "3.7": "python:3.7-slim", + "3.8": "python:3.8-slim" }, "images": ["run", "build"], "username": "docker_user", @@ -21,7 +22,8 @@ }, "nodejs": { "base_images": { - "13.6": "node:13.6-slim" + "12": "node:12-slim", + "14": "node:14-slim" }, "images": ["run", "build"], "username": "docker_user", @@ -36,13 +38,12 @@ "languages": { "python": { "base_images": { - "3.8": "lambci/lambda:build-python3.8", - "3.7": "lambci/lambda:build-python3.7", - "3.6": "lambci/lambda:build-python3.6" + "3.9": "amazon/aws-lambda-python:3.9", + "3.8": "amazon/aws-lambda-python:3.8", + "3.7": "amazon/aws-lambda-python:3.7" }, - "versions": ["3.6", "3.7", "3.8"], + "versions": ["3.7", "3.8", "3.9"], "images": ["build"], - "username": "docker_user", "deployment": { "files": [ "handler.py", "storage.py"], "packages": [] @@ -50,12 +51,11 @@ }, "nodejs": { "base_images": { - "12.x" : "lambci/lambda:build-nodejs12.x", - "10.x" : "lambci/lambda:build-nodejs10.x" + "14.x" : "amazon/aws-lambda-nodejs:14", + "12.x" : "amazon/aws-lambda-nodejs:12" }, - "versions": ["10.x", "12.x"], + "versions": ["12.x", "14.x"], "images": ["build"], - "username": "docker_user", "deployment": { "files": [ "handler.js", "storage.js"], "packages": { @@ -69,8 +69,9 @@ "languages": { "python": { "base_images": { - "3.7": "mcr.microsoft.com/azure-functions/python:2.0-python3.7", - "3.6": "mcr.microsoft.com/azure-functions/python:2.0-python3.6" + "3.7": "mcr.microsoft.com/azure-functions/python:3.0-python3.7", + "3.8": "mcr.microsoft.com/azure-functions/python:3.0-python3.8", + "3.9": "mcr.microsoft.com/azure-functions/python:3.0-python3.9" }, "images": ["build"], "username": "docker_user", @@ -81,8 +82,8 @@ }, "nodejs": { "base_images": { - "10" : "mcr.microsoft.com/azure-functions/node:2.0-node10", - "8" : "mcr.microsoft.com/azure-functions/node:2.0-node8" + "14" : "mcr.microsoft.com/azure-functions/node:3.0-node14", + "12" : "mcr.microsoft.com/azure-functions/node:3.0-node12" }, "images": ["build"], "username": "docker_user", @@ -116,9 +117,9 @@ }, "nodejs": { "base_images": { - "6" : "gcr.io/google-appengine/nodejs", - "8" : "gcr.io/google-appengine/nodejs", - "10" : "gcr.io/google-appengine/nodejs" + "10" : "gcr.io/google-appengine/nodejs", + "12" : "gcr.io/google-appengine/nodejs", + "14" : "gcr.io/google-appengine/nodejs" }, "images": ["build"], "username": "docker_user", @@ -131,5 +132,37 @@ } } } + }, + "openwhisk": { + "languages": { + "python": { + "base_images": { + "3.7": "openwhisk/action-python-v3.7", + "3.9": "openwhisk/action-python-v3.9" + }, + "images": ["function"], + "username": "docker_user", + "deployment": { + "files": [ "__main__.py", "storage.py", "setup.py"], + "packages": { + "minio": "^5.0.10" + } + } + }, + "nodejs": { + "base_images": { + "10" : "openwhisk/action-nodejs-v10", + "12" : "openwhisk/action-nodejs-v12" + }, + "images": ["function"], + "username": "docker_user", + "deployment": { + "files": [ "index.js", "storage.js"], + "packages": { + "minio": "^7.0.16" + } + } + } + } } } diff --git a/docker/Dockerfile.build.aws.nodejs b/docker/Dockerfile.build.aws.nodejs deleted file mode 100755 index 23806417..00000000 --- a/docker/Dockerfile.build.aws.nodejs +++ /dev/null @@ -1,14 +0,0 @@ -ARG BASE_IMAGE -FROM ${BASE_IMAGE} -ARG USER -ARG UID -ENV HOME=/home/${USER} - -RUN useradd --non-unique -u $UID ${USER} -WORKDIR ${HOME} -RUN mkdir -p /mnt/function && chown -R ${USER}:${USER} /mnt/function -USER ${USER}:${USER} - -COPY --chown=${USER}:${USER} docker/nodejs_installer.sh installer.sh - -CMD /bin/bash installer.sh diff --git a/docker/Dockerfile.build.aws.python b/docker/Dockerfile.build.aws.python deleted file mode 100755 index af5ae7a1..00000000 --- a/docker/Dockerfile.build.aws.python +++ /dev/null @@ -1,17 +0,0 @@ -ARG BASE_IMAGE -FROM ${BASE_IMAGE} -ARG USER -ARG UID -ARG VERSION -ENV HOME=/home/${USER} -ENV PYTHON_VERSION=${VERSION} - -RUN useradd --non-unique -u $UID ${USER} -WORKDIR ${HOME} -RUN mkdir -p /mnt/function && chown -R ${USER}:${USER} /mnt/function -USER ${USER}:${USER} - -COPY --chown=${USER}:${USER} docker/python_installer.sh installer.sh - -ENV SCRIPT_FILE=/mnt/function/package.sh -CMD /bin/bash installer.sh diff --git a/docker/Dockerfile.build.azure.nodejs b/docker/Dockerfile.build.azure.nodejs deleted file mode 100755 index b13db48a..00000000 --- a/docker/Dockerfile.build.azure.nodejs +++ /dev/null @@ -1,12 +0,0 @@ -ARG BASE_IMAGE -FROM ${BASE_IMAGE} -ARG USER -ARG UID -ENV HOME=/home/${USER} - -RUN useradd --non-unique -u $UID ${USER} -WORKDIR ${HOME} -RUN chown -R ${USER}:${USER} /home/${USER}/ -USER ${USER}:${USER} - -CMD cd /mnt/function && npm install && rm -rf package-lock.json diff --git a/docker/Dockerfile.build.azure.python b/docker/Dockerfile.build.azure.python deleted file mode 100755 index 59d79059..00000000 --- a/docker/Dockerfile.build.azure.python +++ /dev/null @@ -1,19 +0,0 @@ -ARG BASE_IMAGE -FROM ${BASE_IMAGE} -ARG USER -ARG UID -ARG VERSION -ENV HOME=/home/${USER} -ENV PYTHON_VERSION=${VERSION} - -RUN useradd --non-unique -u $UID ${USER} -WORKDIR ${HOME} -RUN apt-get update\ - && apt-get install -y gcc build-essential python-dev libxml2 libxml2-dev zlib1g-dev\ - && apt-get purge -y --auto-remove -USER ${USER}:${USER} - -ENV SCRIPT_FILE=/mnt/function/package.sh -CMD cd /mnt/function\ - && if test -f "requirements.txt.${PYTHON_VERSION}"; then pip3 -q install -r requirements.txt -r requirements.txt.${PYTHON_VERSION} -t .python_packages/lib/site-packages ; else pip3 -q install -r requirements.txt -t .python_packages/lib/site-packages ; fi\ - && if test -f "${SCRIPT_FILE}"; then /bin/bash ${SCRIPT_FILE} .python_packages/lib/site-packages ; fi diff --git a/docker/Dockerfile.build.gcp.nodejs b/docker/Dockerfile.build.gcp.nodejs deleted file mode 100755 index 00d811a4..00000000 --- a/docker/Dockerfile.build.gcp.nodejs +++ /dev/null @@ -1,16 +0,0 @@ -ARG BASE_IMAGE -FROM ${BASE_IMAGE} -ARG USER -ARG UID -ARG VERSION -ENV HOME=/home/${USER} - -RUN useradd --non-unique -u $UID ${USER} -WORKDIR ${HOME} - -RUN install_node --ignore-verification-failure v${VERSION} -RUN chown -R ${USER}:${USER} /home/${USER}/ -USER ${USER}:${USER} - -CMD cd /mnt/function && npm install && rm -rf package-lock.json - diff --git a/docker/Dockerfile.build.gcp.python b/docker/Dockerfile.build.gcp.python deleted file mode 100755 index be471f7d..00000000 --- a/docker/Dockerfile.build.gcp.python +++ /dev/null @@ -1,26 +0,0 @@ -ARG BASE_IMAGE -FROM ${BASE_IMAGE} -ARG USER -ARG UID -ARG VERSION -ENV HOME=/home/${USER} -ENV PYTHON_VERSION=${VERSION} - -RUN useradd --non-unique -u $UID ${USER} -WORKDIR ${HOME} -RUN chmod a+w ${HOME} -USER ${USER}:${USER} - -RUN export PATH=/opt/python3.7/bin:/opt/python3.6/bin:/opt/python3.5/bin:/opt/python3.4/bin:$PATH -RUN echo $PATH -RUN which python - -RUN ls -al $HOME -RUN virtualenv -p python${PYTHON_VERSION} ${HOME}/env -ENV VIRTUAL_ENV ${HOME}/env -ENV PATH ${HOME}/env/bin:${PATH} - -ENV SCRIPT_FILE=/mnt/function/package.sh -CMD cd /mnt/function\ - && if test -f "requirements.txt.${PYTHON_VERSION}"; then pip3 -q install -r requirements.txt -r requirements.txt.${PYTHON_VERSION} -t .python_packages/lib/site-packages ; else pip3 -q install -r requirements.txt -t .python_packages/lib/site-packages ; fi\ - && if test -f "${SCRIPT_FILE}"; then /bin/bash ${SCRIPT_FILE} .python_packages/lib/site-packages ; fi diff --git a/docker/Dockerfile.build.local.nodejs b/docker/Dockerfile.build.local.nodejs deleted file mode 100755 index 64ecd75e..00000000 --- a/docker/Dockerfile.build.local.nodejs +++ /dev/null @@ -1,14 +0,0 @@ -ARG BASE_IMAGE -FROM ${BASE_IMAGE} -ARG USER -ARG UID -ENV HOME=/home/${USER} - -RUN useradd --non-unique -u $UID ${USER} -WORKDIR ${HOME} -RUN apt-get update\ - && apt-get install -y zip\ - && apt-get purge -y --auto-remove -USER ${USER}:${USER} - -CMD cd /mnt/function && npm install && rm -rf package-lock.json diff --git a/docker/Dockerfile.build.local.python b/docker/Dockerfile.build.local.python deleted file mode 100755 index 2dd6c0a4..00000000 --- a/docker/Dockerfile.build.local.python +++ /dev/null @@ -1,20 +0,0 @@ -ARG BASE_IMAGE -FROM ${BASE_IMAGE} -ARG USER -ARG UID -ARG VERSION -ENV HOME=/home/${USER} -ENV PYTHON_VERSION=${VERSION} - -RUN useradd --non-unique -u $UID ${USER} -WORKDIR ${HOME} -RUN apt-get update\ - && apt-get install -y gcc build-essential python-dev libxml2 libxml2-dev zlib1g-dev\ - && apt-get purge -y --auto-remove -RUN mkdir -p /mnt/function && chown -R ${USER}:${USER} /mnt/function -USER ${USER}:${USER} - -COPY --chown=${USER}:${USER} docker/python_installer.sh installer.sh - -ENV SCRIPT_FILE=/mnt/function/package.sh -CMD /bin/bash installer.sh diff --git a/docker/Dockerfile.run.local.nodejs b/docker/Dockerfile.run.local.nodejs deleted file mode 100755 index f6dc1201..00000000 --- a/docker/Dockerfile.run.local.nodejs +++ /dev/null @@ -1,33 +0,0 @@ -ARG BASE_IMAGE -FROM ${BASE_IMAGE} -ARG USER -ARG UID -ENV HOME=/home/${USER} - -WORKDIR ${HOME} -RUN deps=''\ - && apt-get update\ - && apt-get install -y curl net-tools python3 sudo ${deps}\ - && apt-get purge -y --auto-remove ${deps} - -RUN useradd --non-unique -u ${UID} -m ${USER}\ - # Set correct permission on home directory - && chown -R ${USER}:${USER} ${HOME}\ - # Enable non-password use of sudo - && echo "$USER ALL=(ALL:ALL) NOPASSWD: ALL" | tee /etc/sudoers.d/dont-prompt-$USER-for-password - -USER ${USER}:${USER} -COPY --chown=${USER}:${USER} docker/local/*.py ${HOME}/ -COPY --chown=${USER}:${USER} docker/local/run.sh . -COPY --chown=${USER}:${USER} docker/local/nodejs/*.js ${HOME}/ -COPY --chown=${USER}:${USER} docker/local/nodejs/timeit.sh . -COPY --chown=${USER}:${USER} docker/local/nodejs/runners.json . -COPY --chown=${USER}:${USER} docker/local/nodejs/package.json . -# must be run as root for some reason -# minio - minio storage SDK -# strftime - format timestamp easily -# csv-writer - export csv -RUN npm install - -# pypapi dependnecy -RUN chmod +x ${HOME}/run.sh diff --git a/docker/Dockerfile.run.local.python b/docker/Dockerfile.run.local.python deleted file mode 100755 index 43b8f735..00000000 --- a/docker/Dockerfile.run.local.python +++ /dev/null @@ -1,36 +0,0 @@ -ARG BASE_IMAGE -FROM ${BASE_IMAGE} -ARG USER -ARG UID -ENV HOME=/home/${USER} - -WORKDIR ${HOME} -# must be run as root for some reason -RUN deps=''\ - && apt-get update\ - # for route and sudo - && apt-get install -y curl net-tools sudo ${deps}\ - && apt-get purge -y --auto-remove ${deps}\ - && pip3 install cffi minio bottle -RUN useradd -u ${UID} -m ${USER}\ - # Let the user use sudo - && usermod -aG sudo ${USER}\ - # Set correct permission on home directory - && chown -R ${USER}:${USER} ${HOME}\ - # Enable non-password use of sudo - && echo "$USER ALL=(ALL:ALL) NOPASSWD: ALL" | tee /etc/sudoers.d/dont-prompt-$USER-for-password -RUN chown -R ${USER}:${USER} ${HOME} - - -USER ${USER}:${USER} -COPY --chown=${USER}:${USER} docker/local/run.sh . -COPY --chown=${USER}:${USER} docker/local/*.py ${HOME}/ -COPY --chown=${USER}:${USER} docker/local/python/*.py ${HOME}/ -COPY --chown=${USER}:${USER} docker/local/python/timeit.sh . -COPY --chown=${USER}:${USER} docker/local/python/runners.json . -# https://github.com/moby/moby/issues/35018 :-( -ADD --chown=docker_user:docker_user third-party/pypapi/pypapi ${HOME}/pypapi - -ENV PYTHONPATH=${HOME}/.python_packages/lib/site-packages:$PYTHONPATH - -RUN chmod +x ${HOME}/run.sh diff --git a/docker/aws/nodejs/Dockerfile.build b/docker/aws/nodejs/Dockerfile.build new file mode 100755 index 00000000..63dbb37a --- /dev/null +++ b/docker/aws/nodejs/Dockerfile.build @@ -0,0 +1,20 @@ +ARG BASE_IMAGE +FROM ${BASE_IMAGE} + +# useradd, groupmod +RUN yum install -y shadow-utils +ENV GOSU_VERSION 1.14 +# https://github.com/tianon/gosu/releases/tag/1.14 +# key https://keys.openpgp.org/search?q=tianon%40debian.org +RUN curl -o /usr/local/bin/gosu -SL "https://github.com/tianon/gosu/releases/download/${GOSU_VERSION}/gosu-amd64" \ + && chmod +x /usr/local/bin/gosu +RUN mkdir -p /sebs/ +COPY docker/nodejs_installer.sh /sebs/installer.sh +COPY docker/entrypoint.sh /sebs/entrypoint.sh +RUN chmod +x /sebs/entrypoint.sh + +# useradd and groupmod is installed in /usr/sbin which is not in PATH +ENV PATH=/usr/sbin:$PATH +ENV SCRIPT_FILE=/mnt/function/package.sh +CMD /bin/bash /sebs/installer.sh +ENTRYPOINT ["/sebs/entrypoint.sh"] diff --git a/docker/aws/python/Dockerfile.build b/docker/aws/python/Dockerfile.build new file mode 100755 index 00000000..960fc300 --- /dev/null +++ b/docker/aws/python/Dockerfile.build @@ -0,0 +1,22 @@ +ARG BASE_IMAGE +FROM ${BASE_IMAGE} +ARG VERSION +ENV PYTHON_VERSION=${VERSION} + +# useradd, groupmod +RUN yum install -y shadow-utils +ENV GOSU_VERSION 1.14 +# https://github.com/tianon/gosu/releases/tag/1.14 +# key https://keys.openpgp.org/search?q=tianon%40debian.org +RUN curl -o /usr/local/bin/gosu -SL "https://github.com/tianon/gosu/releases/download/${GOSU_VERSION}/gosu-amd64" \ + && chmod +x /usr/local/bin/gosu +RUN mkdir -p /sebs/ +COPY docker/python_installer.sh /sebs/installer.sh +COPY docker/entrypoint.sh /sebs/entrypoint.sh +RUN chmod +x /sebs/entrypoint.sh + +# useradd and groupmod is installed in /usr/sbin which is not in PATH +ENV PATH=/usr/sbin:$PATH +ENV SCRIPT_FILE=/mnt/function/package.sh +CMD /bin/bash /sebs/installer.sh +ENTRYPOINT ["/sebs/entrypoint.sh"] diff --git a/docker/Dockerfile.manage.azure b/docker/azure/Dockerfile.manage similarity index 68% rename from docker/Dockerfile.manage.azure rename to docker/azure/Dockerfile.manage index 8c76a825..f1274949 100644 --- a/docker/Dockerfile.manage.azure +++ b/docker/azure/Dockerfile.manage @@ -1,6 +1,6 @@ FROM python:3.7-slim-stretch -ARG USER -ARG UID +#ARG USER +#ARG UID # disable telemetry by default ENV FUNCTIONS_CORE_TOOLS_TELEMETRY_OPTOUT=1 @@ -20,13 +20,22 @@ RUN apt-get clean && apt-get update\ # https://github.com/moby/moby/issues/20295 # https://github.com/moby/moby/issues/20295 -ENV HOME=/home/${USER} -RUN useradd --non-unique --uid ${UID} -m ${USER}\ - && chown ${USER}:${USER} ${HOME}\ - && chown ${USER}:${USER} /mnt -WORKDIR ${HOME} -USER ${USER}:${USER} +#ENV HOME=/home/${USER} +#RUN useradd --non-unique --uid ${UID} -m ${USER}\ +# && chown ${USER}:${USER} ${HOME}\ +# && chown ${USER}:${USER} /mnt +#WORKDIR ${HOME} +#USER ${USER}:${USER} # Extension must be installed for a specific user, I guess. # Installed with root does not work for user. -RUN az extension add --name application-insights +#RUN /usr/bin/az extension add --name application-insights + +RUN apt-get -y --no-install-recommends install gosu +RUN mkdir -p /sebs/ +COPY docker/entrypoint.sh /sebs/entrypoint.sh +RUN chmod +x /sebs/entrypoint.sh + +ENV SCRIPT_FILE=/mnt/function/package.sh +#ENV CMD='/usr/bin/ extension add --name application-insights' +ENTRYPOINT ["/sebs/entrypoint.sh"] diff --git a/docker/azure/nodejs/Dockerfile.build b/docker/azure/nodejs/Dockerfile.build new file mode 100755 index 00000000..c19d43e7 --- /dev/null +++ b/docker/azure/nodejs/Dockerfile.build @@ -0,0 +1,15 @@ +ARG BASE_IMAGE +FROM ${BASE_IMAGE} + +RUN apt-get update && apt-get install -y gosu + +RUN mkdir -p /sebs/ +COPY docker/nodejs_installer.sh /sebs/installer.sh +COPY docker/entrypoint.sh /sebs/entrypoint.sh +RUN chmod +x /sebs/entrypoint.sh + +# useradd and groupmod is installed in /usr/sbin which is not in PATH +ENV PATH=/usr/sbin:$PATH +ENV SCRIPT_FILE=/mnt/function/package.sh +CMD /bin/bash /sebs/installer.sh +ENTRYPOINT ["/sebs/entrypoint.sh"] diff --git a/docker/azure/python/Dockerfile.build b/docker/azure/python/Dockerfile.build new file mode 100755 index 00000000..810bd358 --- /dev/null +++ b/docker/azure/python/Dockerfile.build @@ -0,0 +1,18 @@ +ARG BASE_IMAGE +FROM ${BASE_IMAGE} +ARG VERSION +ENV PYTHON_VERSION=${VERSION} + +RUN apt-get update\ + && apt-get install -y gosu gcc build-essential python-dev libxml2 libxml2-dev zlib1g-dev\ + && apt-get purge -y --auto-remove + +RUN mkdir -p /sebs/ +COPY docker/python_installer.sh /sebs/installer.sh +COPY docker/entrypoint.sh /sebs/entrypoint.sh +RUN chmod +x /sebs/entrypoint.sh + +# useradd and groupmod is installed in /usr/sbin which is not in PATH +ENV SCRIPT_FILE=/mnt/function/package.sh +CMD /bin/bash /sebs/installer.sh +ENTRYPOINT ["/sebs/entrypoint.sh"] diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh new file mode 100755 index 00000000..c8e24cd4 --- /dev/null +++ b/docker/entrypoint.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +USER_ID=${CONTAINER_UID} +GROUP_ID=${CONTAINER_GID} +USER=${CONTAINER_USER} + +useradd --non-unique -m -u ${USER_ID} ${USER} +groupmod --non-unique -g ${GROUP_ID} ${USER} +mkdir -p /mnt/function && chown -R ${USER}:${USER} /mnt/function +export HOME=/home/${USER} +echo "Running as ${USER}, with ${USER_ID} and ${GROUP_ID}" + +if [ ! -z "$CMD" ]; then + gosu ${USER} $CMD +fi + +exec gosu ${USER} "$@" + diff --git a/docker/gcp/nodejs/Dockerfile.build b/docker/gcp/nodejs/Dockerfile.build new file mode 100755 index 00000000..a09ff331 --- /dev/null +++ b/docker/gcp/nodejs/Dockerfile.build @@ -0,0 +1,18 @@ +ARG BASE_IMAGE +FROM ${BASE_IMAGE} +ARG VERSION +ENV HOME=/home/${USER} + +RUN install_node --ignore-verification-failure v${VERSION} +RUN apt-get update && apt-get install -y gosu + +RUN mkdir -p /sebs/ +COPY docker/nodejs_installer.sh /sebs/installer.sh +COPY docker/entrypoint.sh /sebs/entrypoint.sh +RUN chmod +x /sebs/entrypoint.sh + +# useradd and groupmod is installed in /usr/sbin which is not in PATH +ENV PATH=/usr/sbin:$PATH +ENV SCRIPT_FILE=/mnt/function/package.sh +CMD /bin/bash /sebs/installer.sh +ENTRYPOINT ["/sebs/entrypoint.sh"] diff --git a/docker/gcp/python/Dockerfile.build b/docker/gcp/python/Dockerfile.build new file mode 100755 index 00000000..62130ebb --- /dev/null +++ b/docker/gcp/python/Dockerfile.build @@ -0,0 +1,23 @@ +ARG BASE_IMAGE +FROM ${BASE_IMAGE} +ARG VERSION +ENV PYTHON_VERSION=${VERSION} + +RUN apt-get update\ + && apt-get install -y gosu gcc build-essential python-dev libxml2 libxml2-dev zlib1g-dev\ + && apt-get purge -y --auto-remove + +RUN export PATH=/opt/python3.7/bin:/opt/python3.6/bin:/opt/python3.5/bin:/opt/python3.4/bin:$PATH +RUN virtualenv -p python${PYTHON_VERSION} /sebs/env +ENV VIRTUAL_ENV /sebs/env +ENV PATH /sebs/env/bin:${PATH} + +RUN mkdir -p /sebs/ +COPY docker/python_installer.sh /sebs/installer.sh +COPY docker/entrypoint.sh /sebs/entrypoint.sh +RUN chmod +x /sebs/entrypoint.sh + +# useradd and groupmod is installed in /usr/sbin which is not in PATH +ENV SCRIPT_FILE=/mnt/function/package.sh +CMD /bin/bash /sebs/installer.sh +ENTRYPOINT ["/sebs/entrypoint.sh"] diff --git a/docker/local/entrypoint.sh b/docker/local/entrypoint.sh new file mode 100755 index 00000000..5451f551 --- /dev/null +++ b/docker/local/entrypoint.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +USER_ID=${CONTAINER_UID} +GROUP_ID=${CONTAINER_GID} +USER=${CONTAINER_USER} + +useradd --non-unique -m -u ${USER_ID} ${USER} +groupmod --non-unique -g ${GROUP_ID} ${USER} +export HOME=/home/${USER} +echo "Running as ${USER}, with ${USER_ID} and ${GROUP_ID}" + +if [ ! -z "$CMD" ]; then + gosu ${USER} $CMD +fi + +chown -R ${USER}:${USER} /sebs/ +echo "$USER ALL=(ALL:ALL) NOPASSWD: ALL" | tee /etc/sudoers.d/dont-prompt-$USER-for-password +usermod -aG sudo ${USER} + +exec gosu ${USER} "$@" + diff --git a/docker/local/nodejs/Dockerfile.build b/docker/local/nodejs/Dockerfile.build new file mode 100755 index 00000000..1ba18a49 --- /dev/null +++ b/docker/local/nodejs/Dockerfile.build @@ -0,0 +1,16 @@ +ARG BASE_IMAGE +FROM ${BASE_IMAGE} + +RUN apt-get update\ + && apt-get install -y --no-install-recommends zip gosu\ + && apt-get purge -y --auto-remove + +RUN mkdir -p /sebs/ +COPY docker/nodejs_installer.sh /sebs/installer.sh +COPY docker/entrypoint.sh /sebs/entrypoint.sh +RUN chmod +x /sebs/entrypoint.sh + +# useradd and groupmod is installed in /usr/sbin which is not in PATH +ENV SCRIPT_FILE=/mnt/function/package.sh +CMD /bin/bash /sebs/installer.sh +ENTRYPOINT ["/sebs/entrypoint.sh"] diff --git a/docker/local/nodejs/Dockerfile.run b/docker/local/nodejs/Dockerfile.run new file mode 100755 index 00000000..4f2f604c --- /dev/null +++ b/docker/local/nodejs/Dockerfile.run @@ -0,0 +1,27 @@ +ARG BASE_IMAGE +FROM ${BASE_IMAGE} + +RUN deps=''\ + && apt-get update\ + && apt-get install -y --no-install-recommends curl net-tools gosu python3 sudo ${deps}\ + && apt-get purge -y --auto-remove ${deps} + +RUN mkdir -p /sebs +RUN cd /sebs/ && npm install -g uuid strftime express minio +# NODE_PATH=$(npm root --quiet -g) +# https://github.com/moby/moby/issues/29110 +ENV NODE_PATH=/usr/local/lib/node_modules + +COPY docker/local/*.py /sebs/ +COPY docker/local/run.sh /sebs/ +COPY docker/local/nodejs/*.js /sebs/ +COPY docker/local/nodejs/run_server.sh /sebs/ +COPY docker/local/nodejs/timeit.sh /sebs/ +COPY docker/local/nodejs/runners.json /sebs/ +COPY docker/local/nodejs/package.json /sebs/ + +COPY docker/local/entrypoint.sh /sebs/entrypoint.sh +RUN chmod +x /sebs/entrypoint.sh +RUN chmod +x /sebs/run.sh + +ENTRYPOINT ["/sebs/entrypoint.sh"] diff --git a/docker/local/nodejs/package.json b/docker/local/nodejs/package.json index 219ee514..635c8b69 100644 --- a/docker/local/nodejs/package.json +++ b/docker/local/nodejs/package.json @@ -1,7 +1,5 @@ { "dependencies": { - "csv-writer": "^1.5.0", - "glob": "^7.1.6", "minio": "^7.0.13", "strftime": "^0.10.0", "uuid": "^3.4.0" diff --git a/docker/local/nodejs/run_server.sh b/docker/local/nodejs/run_server.sh new file mode 100755 index 00000000..c257e1fb --- /dev/null +++ b/docker/local/nodejs/run_server.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +node /sebs/server.js "$@" diff --git a/docker/local/nodejs/server.js b/docker/local/nodejs/server.js new file mode 100644 index 00000000..b40696d7 --- /dev/null +++ b/docker/local/nodejs/server.js @@ -0,0 +1,42 @@ +const http = require('http'), + strftime = require('strftime'), + express = require('express'), + f = require('/function/function/function'); +//import { v4 as uuidv4 } from 'uuid'; +const { v4: uuidv4 } = require('uuid'); + + +var app = express(); +app.use(express.json()); + +app.post('/', function (req, res) { + + let begin = Date.now(); + let ret = f.handler(req.body); + ret.then((func_res) => { + + let end = Date.now(); + res.setHeader('Content-Type', 'application/json'); + res.end(JSON.stringify({ + begin: strftime('%s.%L', new Date(begin)), + end: strftime('%s.%L', new Date(end)), + request_id: uuidv4(), + is_cold: false, + result: { + output: func_res + } + })); + }, + (reason) => { + console.log('Function invocation failed!'); + console.log(reason); + process.exit(1); + } + ); +}); + +app.listen(port=process.argv[2], function () { + console.log(`Server listening on port ${process.argv[2]}.`); +}); + + diff --git a/docker/local/python/Dockerfile.build b/docker/local/python/Dockerfile.build new file mode 100755 index 00000000..874b05a7 --- /dev/null +++ b/docker/local/python/Dockerfile.build @@ -0,0 +1,18 @@ +ARG BASE_IMAGE +FROM ${BASE_IMAGE} +ARG VERSION +ENV PYTHON_VERSION=${VERSION} + +RUN apt-get update\ + && apt-get install -y --no-install-recommends gcc build-essential python-dev libxml2 libxml2-dev zlib1g-dev gosu\ + && apt-get purge -y --auto-remove + +RUN mkdir -p /sebs/ +COPY docker/python_installer.sh /sebs/installer.sh +COPY docker/entrypoint.sh /sebs/entrypoint.sh +RUN chmod +x /sebs/entrypoint.sh + +# useradd and groupmod is installed in /usr/sbin which is not in PATH +ENV SCRIPT_FILE=/mnt/function/package.sh +CMD /bin/bash /sebs/installer.sh +ENTRYPOINT ["/sebs/entrypoint.sh"] diff --git a/docker/local/python/Dockerfile.run b/docker/local/python/Dockerfile.run new file mode 100755 index 00000000..84f9852e --- /dev/null +++ b/docker/local/python/Dockerfile.run @@ -0,0 +1,25 @@ +ARG BASE_IMAGE +FROM ${BASE_IMAGE} + +RUN deps=''\ + && apt-get update\ + # for route and sudo + && apt-get install --no-install-recommends -y curl gosu net-tools sudo ${deps}\ + && apt-get purge -y --auto-remove ${deps}\ + && pip3 install cffi minio bottle + +RUN mkdir -p /sebs +COPY docker/local/run.sh /sebs/ +COPY docker/local/*.py /sebs/ +COPY docker/local/python/*.py /sebs/ +COPY docker/local/python/run_server.sh /sebs/ +COPY docker/local/python/timeit.sh /sebs/ +COPY docker/local/python/runners.json /sebs/ +ADD third-party/pypapi/pypapi /sebs/pypapi +ENV PYTHONPATH=/sebs/.python_packages/lib/site-packages:$PYTHONPATH + +COPY docker/local/entrypoint.sh /sebs/entrypoint.sh +RUN chmod +x /sebs/entrypoint.sh +RUN chmod +x /sebs/run.sh + +ENTRYPOINT ["/sebs/entrypoint.sh"] diff --git a/docker/local/python/run_server.sh b/docker/local/python/run_server.sh new file mode 100755 index 00000000..fa9a8229 --- /dev/null +++ b/docker/local/python/run_server.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +python3 /sebs/server.py "$@" diff --git a/docker/local/python/server.py b/docker/local/python/server.py index 268c2da3..e86327dc 100644 --- a/docker/local/python/server.py +++ b/docker/local/python/server.py @@ -6,7 +6,7 @@ import bottle from bottle import route, run, template, request -CODE_LOCATION='code' +CODE_LOCATION='/function' @route('/', method='POST') def flush_log(): diff --git a/docker/openwhisk/nodejs/Dockerfile.function b/docker/openwhisk/nodejs/Dockerfile.function new file mode 100644 index 00000000..e4f2f375 --- /dev/null +++ b/docker/openwhisk/nodejs/Dockerfile.function @@ -0,0 +1,6 @@ +ARG BASE_IMAGE +FROM $BASE_IMAGE +COPY . /function/ +RUN cd /function \ + && npm install --no-package-lock --production \ + && npm cache clean --force diff --git a/docker/openwhisk/python/Dockerfile.function b/docker/openwhisk/python/Dockerfile.function new file mode 100644 index 00000000..a6c4225e --- /dev/null +++ b/docker/openwhisk/python/Dockerfile.function @@ -0,0 +1,9 @@ +ARG BASE_IMAGE +FROM $BASE_IMAGE +ARG VERSION +ENV PYTHON_VERSION=${VERSION} +COPY . function/ + +RUN touch function/__init__.py \ + && if test -f "function/requirements.txt.${PYTHON_VERSION}"; then pip install --no-cache-dir -r function/requirements.txt -r function/requirements.txt.${PYTHON_VERSION} function/ ; else pip install --no-cache-dir -r function/requirements.txt function/ ; fi + diff --git a/docs/benchmarks.md b/docs/benchmarks.md new file mode 100644 index 00000000..51b3ba5b --- /dev/null +++ b/docs/benchmarks.md @@ -0,0 +1,23 @@ + +## Benchmark Applications + + +| Type | Benchmark | Languages | Description | +| :--- | :---: | :---: | :---: | +| Webapps | 110.dynamic-html | Python, Node.js | Generate dynamic HTML from a template. | +| Webapps | 120.uploader | Python, Node.js | Uploader file from provided URL to cloud storage. | +| Multimedia | 210.thumbnailer | Python, Node.js | Generate a thumbnail of an image. | +| Multimedia | 220.video-processing | Python | Add a watermark and generate gif of a video file. | +| Utilities | 311.compression | Python | Create a .zip file for a group of files in storage and return to user to download. | +| Utilities | 504.dna-visualization | Python | Creates a visualization data for DNA sequence. | +| Inference | 411.image-recognition | Python | Image recognition with ResNet and pytorch. | +| Scientific | 501.graph-pagerank | Python | PageRank implementation with igraph. | +| Scientific | 501.graph-mst | Python | Minimum spanning tree (MST) implementation with igraph. | +| Scientific | 501.graph-bfs | Python | Breadth-first search (BFS) implementation with igraph. | + +For details on benchmark selection and their characterization, please refer to [our paper](#paper). + +## Workflow Applications + +**(WiP)** Coming soon! + diff --git a/docs/build.md b/docs/build.md new file mode 100644 index 00000000..c6c293aa --- /dev/null +++ b/docs/build.md @@ -0,0 +1,55 @@ + +SeBS caches built code packages to save time, as installing dependencies can be time and bandwidth consuming, e.g., for ML frameworks such as PyTorch. +Furthermore, some benchmarks require special treatment - for example, PyTorch image recognition benchmark requires additinal stripping and compression steps to fit into the size limits of AWS Lambda code package. + +By default, we deploy benchmark code as package uploaded to the serverless platform. +However, on some platforms we use [Docker images](#docker-image-build) instead. + +```mermaid +sequenceDiagram + participant Benchmark Builder + participant Cache + participant Platform + participant Docker Image Builder + Benchmark Builder->>Cache: Query for an up-to-date build. + Benchmark Builder->>Benchmark Builder: Prepare environment and benchmark code. + Benchmark Builder->>Benchmark Builder: Install platform-specific dependencies. + Benchmark Builder->>Benchmark Builder: Install benchmark dependencies. + Benchmark Builder->>Platform: Package code. + Platform-->>Docker Image Builder: Build Image. + Platform->>Benchmark Builder: Returns zip file or image tag. +``` +## Code Package Build + +**Query Cache** - first, we check if there is an up-to-date build of the benchmark function +that can be used. + +**Prepare Environment** - benchmark code with data is copied to the build location. + +**Add Benchmark Data** - optional step of adding additional, external dependencies. An example is downloading `ffmpeg` release into `220.video-processing` benchmark. + +**Add Platform-Specific Wrappers** - we add lightweight shims to implement the cloud-specific API and keep benchmark applications generic and portable. + +**Add Deployment Packages** - some platforms require installing specific dependencies, such as cloud storage SDKs in Azure and Google Cloud, as well as the Minio SDK for OpenWhisk. + +**Install Dependencies** - in this step, we use the Docker builder container. +We mount the working copy as a volume in the container, and execute there +This step is skipped for OpenWhisk. + +**Package Code** - we move files to create the directory structure expected on each cloud platform and +create a final deployment package. An example of a customization is Azure Functions, where additional +JSON configuration files are needed. + +**Build Docker Image** - in this step, we create a new image `function.{platform}.{benchmark}.{language}-{version}`. +Benchmark and all of its dependencies are installed there, and the image can be deployed directly +to the serverless platform. At the moment, this step is used only in OpenWhisk. + +## Docker Image Build + +A different approach is taken in OpenWhisk. +Since OpenWhisk has a very small size limit on code packages, we deploy all functions as Docker images. +There, in this step, we copy the prepared benchmark code into a newly created Docker image where +all dependencies are installed. The image is later pushed to either DockerHub or a user-defined registry. + +In future, we plan to extend Docker image support to other platforms as well. + diff --git a/docs/design.md b/docs/design.md index 021fe4ec..2a320b2c 100644 --- a/docs/design.md +++ b/docs/design.md @@ -54,7 +54,7 @@ configuration. `sebs/experiments/` - implements the SeBS experiments. -`sebs/{aws,azure,gcp}/` - implementation of the FaaS interface for each platform. +`sebs/{aws,azure,gcp,openwhisk}/` - implementation of the FaaS interface for each platform. `sebs/local/` - implements the local invocations of functions with Docker containers and `minio` storage. diff --git a/docs/experiments.md b/docs/experiments.md new file mode 100644 index 00000000..cc5907a5 --- /dev/null +++ b/docs/experiments.md @@ -0,0 +1,31 @@ + +## Experiments + +For details on experiments and methodology, please refer to [our paper](#paper). + +#### Performance & cost + +Invokes given benchmark a selected number of times, measuring the time and cost of invocations. +Supports `cold` and `warm` invocations with a selected number of concurrent invocations. +In addition, to accurately measure the overheads of Azure Function Apps, we offer `burst` and `sequential` invocation type that doesn't distinguish +between cold and warm startups. + +#### Network ping-pong + +Measures the distribution of network latency between benchmark driver and function instance. + +#### Invocation overhead + +The experiment performs the clock drift synchronization protocol to accurately measure the startup time of a function by comparing +benchmark driver and function timestamps. + +#### Eviction model + +**(WiP)** Executes test functions multiple times, with varying size, memory and runtime configurations, to test for how long function instances stay alive. +The result helps to estimate the analytical models describing cold startups. +Currently supported only on AWS. + +#### Communication Channels + +**(WiP)** + diff --git a/docs/modularity.md b/docs/modularity.md index 5994a030..33bda56e 100644 --- a/docs/modularity.md +++ b/docs/modularity.md @@ -51,6 +51,13 @@ def handler(event): Configure dependencies in `requirements.txt` and `package.json`. By default, only source code is deployed. If you need to use additional resources, e.g., HTML template, use script `init.sh` (see an example in `110.dynamic-html`). +**Important** By default, SeBS deploys code packages using code packages. +Starting from OpenWhisk addition in release 1.1, we are adding function +deployment as Docker images. Docker images with existing benchmarks +are available on [Docker Hub](https://hub.docker.com/repository/docker/spcleth/serverless-benchmarks). +When adding a new benchmark, it is possible to use a local Docker registry +to push images with the new functions - see [OpenWhisk documentation for details](platforms.md). + ### How to add a new serverless platform? First, implement the interfaces in `sebs/faas/*.py` - details can be found in the diff --git a/docs/overview.png b/docs/overview.png new file mode 100644 index 00000000..fc47c3e5 Binary files /dev/null and b/docs/overview.png differ diff --git a/docs/platforms.md b/docs/platforms.md index 372a770b..3f6b9f28 100644 --- a/docs/platforms.md +++ b/docs/platforms.md @@ -1,6 +1,13 @@ -SeBS supports three commercial serverless platforms: AWS Lambda, Azure Functions, and Google Cloud -Functions. +SeBS supports three commercial serverless platforms: AWS Lambda, Azure Functions, and Google Cloud Functions. +Furthermore, we support the open source FaaS system OpenWhisk. + +The file `config/example.json` contains all parameters that users can change +to customize the deployment. +Some of these parameters, such as cloud credentials or storage instance address, +are required. +In the following subsections, we discuss the mandatory and optional customization +points for each platform. ## AWS Lambda @@ -71,3 +78,163 @@ export GCP_PROJECT_NAME = XXXX export GCP_SECRET_APPLICATION_CREDENTIALS = XXXX ``` +## OpenWhisk + +SeBS expects users to deploy and configure an OpenWhisk instance. +In `tools/openwhisk_preparation.py`, we include scripts that help install +[kind (Kubernetes in Docker)](https://kind.sigs.k8s.io/) and deploy +OpenWhisk on a `kind` cluster. +The configuration parameters of OpenWhisk for SeBS can be found +in `config/example.json` under the key `['deployment']['openwhisk']`. +In the subsections below, we discuss the meaning and use of each parameter. +To correctly deploy SeBS functions to OpenWhisk, following the +subsections on *Toolchain* and *Docker* configuration is particularly important. + +### Toolchain + +We use OpenWhisk's CLI tool [wsk](https://github.com/apache/openwhisk-cli) +to manage the deployment of functions to OpenWhisk. +Please install `wsk`and configure it to point to your OpenWhisk installation. +By default, SeBS assumes that `wsk` is available in the `PATH`. +To override this, set the configuration option `wskExec` to the location +of your `wsk` executable. +If you are using a local deployment of OpenWhisk with a self-signed +certificate, you can skip certificate validation with the `wsk` flag `--insecure`. +To enable this option, set `wskBypassSecurity` to `true`. +At the moment, all functions are deployed as [*web actions*](https://github.com/apache/openwhisk/blob/master/docs/webactions.md) +that do not require credentials to invoke functions. + +Furthermore, SeBS can be configured to remove the `kind` +cluster after finishing experiments automatically. +The boolean option `removeCluster` helps to automate the experiments +that should be conducted on fresh instances of the system. + +### Docker + +In FaaS platforms, the function's code can usually be deployed as a code package +or a Docker image with all dependencies preinstalled. +However, OpenWhisk has a very low code package size limit of only 48 megabytes. +So, to circumvent this limit, we deploy functions using pre-built Docker images. + +**Important**: OpenWhisk requires that all Docker images are available +in the registry, even if they have been cached on a system serving OpenWhisk +functions. +Function invocations will fail when the image is not available after a +timeout with an error message that does not directly indicate image availability issues. +Therefore, all SeBS benchmark functions are available on the Docker Hub. + +When adding new functions and extending existing functions with new languages +and new language versions, Docker images must be placed in the registry. +However, pushing the image to the default `spcleth/serverless-benchmarks` +repository on Docker Hub requires permissions. +To use a different Docker Hub repository, change the key +`['general']['docker_repository']` in `config/systems.json`. + + +Alternatively, OpenWhisk users can configure the FaaS platform to use a custom and +private Docker registry and push new images there. +A local Docker registry can speed up development when debugging a new function. +SeBS can use alternative Docker registry - see `dockerRegistry` settings +in the example to configure registry endpoint and credentials. +When the `registry` URL is not provided, SeBS will use Docker Hub. +When `username` and `password` are provided, SeBS will log in to the repository +and push new images before invoking functions. +See the documentation on the +[Docker registry](https://github.com/apache/openwhisk-deploy-kube/blob/master/docs/private-docker-registry.md) +and [OpenWhisk configuration](https://github.com/apache/openwhisk-deploy-kube/blob/master/docs/private-docker-registry.md) +for details. + +**Warning**: this feature is experimental and has not been tested extensively. +At the moment, it cannot be used on a `kind` cluster due to issues with +Docker authorization on invoker nodes. [See the OpenWhisk issue for details](https://github.com/apache/openwhisk-deploy-kube/issues/721). + +### Code Deployment + +SeBS builds and deploys a new code package when constructing the local cache, +when the function's contents have changed, and when the user requests a forced rebuild. +In OpenWhisk, this setup is changed - SeBS will first attempt to verify +if the image exists already in the registry and skip building the Docker +image when possible. +Then, SeBS can deploy seamlessly to OpenWhisk using default images +available on Docker Hub. +Furthermore, checking for image existence in the registry helps +avoid failing invocations in OpenWhisk. +For performance reasons, this check is performed only once when +initializing the local cache for the first time. + +When the function code is updated, +SeBS will build the image and push it to the registry. +Currently, the only available option of checking image existence in +the registry is pulling the image. +However, Docker's [experimental `manifest` feature](https://docs.docker.com/engine/reference/commandline/manifest/) +allows checking image status without downloading its contents, saving bandwidth and time. +To use that feature in SeBS, set the `experimentalManifest` flag to true. + +### Storage + +To provide persistent object storage in OpenWhisk, users must first deploy an instance +of [`Minio`](https://github.com/minio/minio) storage. +The storage instance is deployed as a Docker container, and it can be retained +across many experiments. +OpenWhisk functions must be able to reach the storage instance. +Even on a local machine, it's necessary to configure the network address, as OpenWhisk functions +are running isolated from the host network and won't be able to reach other containers running on the Docker bridge. + +Use the following command to deploy the storage instance locally and map the host public port 9011 to Minio instance. + +```bash +./sebs.py storage start minio --port 9011 --output-json out_storage.json +``` + +The output will look similar to the one below. +As we can see, the storage container is running on the default Docker bridge network with address `172.17.0.2` and uses port `9000`. +From the host network, port `9011` is mapped to the container's port `9000` to allow external parties - such as OpenWhisk functions - to reach the storage. + +``` +{ + "address": "172.17.0.2:9000", + "mapped_port": 9011, + "access_key": "XXX", + "secret_key": "XXX", + "instance_id": "XXX", + "input_buckets": [], + "output_buckets": [], + "type": "minio" +} +``` + +The storage configuration found in `out_storage.json` needs to be provided to SeBS, +and the instance address must be updated to not use the internal address. +In this case, the host machine's address is `172.22.20.30`. +Note that that other parties must use the host network port `9011` to reach Minio instance. +Docker's port mapping will take care of the rest. + +``` +jq --argfile file1 out_storage.json '.deployment.openwhisk.storage = $file1, .deployment.openwhisk.storage.address = 172.22.20.30:9011' config/example.json > config/openwhisk.json +``` + +Not sure which address is correct? Use `curl` to verify if Minio's instance can be reached: + +``` +curl -i 172.22.20.30:9011/minio/health/live +HTTP/1.1 200 OK +Accept-Ranges: bytes +Content-Length: 0 +Content-Security-Policy: block-all-mixed-content +Server: MinIO +Strict-Transport-Security: max-age=31536000; includeSubDomains +Vary: Origin +X-Amz-Request-Id: 16F3D9B9FDFFA340 +X-Content-Type-Options: nosniff +X-Xss-Protection: 1; mode=block +Date: Mon, 30 May 2022 10:01:21 GMT +``` + +The `shutdownStorage` switch controls the behavior of SeBS. +When set to true, SeBS will remove the Minio instance after finishing all +work. +Otherwise, the container will be retained, and future experiments with SeBS +will automatically detect an existing Minio instance. +Reusing the Minio instance helps run experiments faster and smoothly since +SeBS does not have to re-upload function's data on each experiment. + diff --git a/docs/usage.md b/docs/usage.md new file mode 100644 index 00000000..89c573e4 --- /dev/null +++ b/docs/usage.md @@ -0,0 +1,156 @@ + +SeBS has three basic commands: `benchmark`, `experiment`, and `local`. +For each command you can pass `--verbose` flag to increase the verbosity of the output. +By default, all scripts will create a cache in directory `cache` to store code with +dependencies and information on allocated cloud resources. +Benchmarks will be rebuilt after a change in source code is detected. +To enforce redeployment of code and benchmark input please use flags `--update-code` +and `--update-storage`, respectively. + +**Note:** the cache does not support updating cloud region. If you want to deploy benchmarks +to a new cloud region, then use a new cache directory. + +### Benchmark + +This command is used to build, deploy, and execute serverless benchmark in cloud. +The example below invokes the benchmark `110.dynamic-html` on AWS via the standard HTTP trigger. + +``` +./sebs.py benchmark invoke 110.dynamic-html test --config config/example.json --deployment aws --verbose +``` + +To configure your benchmark, change settings in the config file or use command-line options. +The full list is available by running `./sebs.py benchmark invoke --help`. + +### Regression + +Additionally, we provide a regression option to execute all benchmarks on a given platform. +The example below demonstrates how to run the regression suite with `test` input size on AWS. + +``` +./sebs.py benchmark regression test --config config/example.json --deployment aws +``` + +The regression can be executed on a single benchmark as well: + +``` +./sebs.py benchmark regression test --config config/example.json --deployment aws --benchmark-name 120.uploader +``` + +### Experiment + +This command is used to execute benchmarks described in the paper. The example below runs the experiment **perf-cost**: + +``` +./sebs.py experiment invoke perf-cost --config config/example.json --deployment aws +``` + +The configuration specifies that benchmark **110.dynamic-html** is executed 50 times, with 50 concurrent invocations, and both cold and warm invocations are recorded. + +```json +"perf-cost": { + "benchmark": "110.dynamic-html", + "experiments": ["cold", "warm"], + "input-size": "test", + "repetitions": 50, + "concurrent-invocations": 50, + "memory-sizes": [128, 256] +} +``` + +To download cloud metrics and process the invocations into a .csv file with data, run the process construct + +``` +./sebs.py experiment process perf-cost --config example.json --deployment aws +``` + +### Local + +In addition to the cloud deployment, we provide an opportunity to launch benchmarks locally with the help of [minio](https://min.io/) storage. +This allows us to conduct debugging and a local characterization of the benchmarks. + +First, launch a storage instance. The command below is going to deploy a Docker container, +map the container's port to port `9011` on host network, and write storage instance configuration +to file `out_storage.json` + +``` +./sebs.py storage start minio --port 9011 --output-json out_storage.json +``` + +Then, we need to update the configuration of `local` deployment with information on the storage +instance. The `.deployment.local`` object in the configuration JSON needs to contain a new object +`storage` with the data provided in the `out_storage.json` file. Fortunately, we can achieve +automatically with a single command by using `jq`: + +``` +jq --argfile file1 out_storage.json '.deployment.local.storage = $file1 ' config/example.json > config/local_deployment.json +``` + +The output file will contain a JSON object that should look similar to this one: + +```json +"deployment": { + "name": "local", + "local": { + "storage": { + "address": "172.17.0.2:9000", + "mapped_port": 9011, + "access_key": "XXXXX", + "secret_key": "XXXXX", + "instance_id": "XXXXX", + "input_buckets": [], + "output_buckets": [], + "type": "minio" + } + } +} +``` + +To launch Docker containers, use the following command - this example launches benchmark `110.dynamic-html` with size `test`: + +``` +./sebs.py local start 110.dynamic-html test out_benchmark.json --config config/local_deployment.json --deployments 1 +``` + +The output file `out_benchmark.json` will contain the information on containers deployed and the endpoints that can be used to invoke functions: + +``` +{ + "functions": [ + { + "benchmark": "110.dynamic-html", + "hash": "5ff0657337d17b0cf6156f712f697610", + "instance_id": "e4797ae01c52ac54bfc22aece1e413130806165eea58c544b2a15c740ec7d75f", + "name": "110.dynamic-html-python-128", + "port": 9000, + "triggers": [], + "url": "172.17.0.3:9000" + } + ], + "inputs": [ + { + "random_len": 10, + "username": "testname" + } + ], + "storage: { + ... + } +} +``` + +In our example, we can use `curl` to invoke the function with provided input: + +``` +curl 172.17.0.3:9000 --request POST --data '{"random_len": 10,"username": "testname"}' --header 'Content-Type: application/json' +``` + +To stop containers, you can use the following command: + +``` +./sebs.py local stop out_benchmark.json +./sebs.py storage stop out_storage.json +``` + +The stopped containers won't be automatically removed unless the option `--remove-containers` has been passed to the `start` command. + diff --git a/install.py b/install.py index 3405be4e..0be56542 100755 --- a/install.py +++ b/install.py @@ -7,15 +7,13 @@ parser = argparse.ArgumentParser(description="Install SeBS and dependencies.") parser.add_argument('--venv', metavar='DIR', type=str, default="python-venv", help='destination of local Python virtual environment') parser.add_argument('--python-path', metavar='DIR', type=str, default="python3", help='Path to local Python installation.') -for deployment in ["aws", "azure", "gcp"]: +for deployment in ["aws", "azure", "gcp", "openwhisk"]: parser.add_argument(f"--{deployment}", action="store_const", const=True, dest=deployment) parser.add_argument(f"--no-{deployment}", action="store_const", const=False, default=True, dest=deployment) for deployment in ["local"]: parser.add_argument(f"--{deployment}", action="store_const", default=True, const=True, dest=deployment) parser.add_argument(f"--no-{deployment}", action="store_const", const=False, dest=deployment) parser.add_argument("--with-pypapi", action="store_true") -parser.add_argument("--force-rebuild-docker-images", default=False, action="store_true") -parser.add_argument("--dont-rebuild-docker-images", default=False, action="store_true") args = parser.parse_args() def execute(cmd): @@ -43,12 +41,6 @@ def execute(cmd): if args.aws: print("Install Python dependencies for AWS") execute(". {}/bin/activate && pip3 install -r requirements.aws.txt".format(env_dir)) - if args.force_rebuild_docker_images or (os.getuid() != 1000 and not args.dont_rebuild_docker_images): - print(f"AWS: rebuild Docker images for current user ID: {os.getuid()}") - execute(". {}/bin/activate && tools/build_docker_images.py --deployment aws".format(env_dir)) - elif os.getuid() != 1000 and args.dont_rebuild_docker_images: - print(f"AWS: Docker images are built for user with UID 1000, current UID: {os.getuid()}." - "Skipping rebuild as requested by user, but recommending to rebuild the images") flag = "TRUE" if args.aws else "FALSE" execute(f'echo "export SEBS_WITH_AWS={flag}" >> {env_dir}/bin/activate') execute(f'echo "unset SEBS_WITH_AWS" >> {env_dir}/bin/deactivate') @@ -56,12 +48,6 @@ def execute(cmd): if args.azure: print("Install Python dependencies for Azure") execute(". {}/bin/activate && pip3 install -r requirements.azure.txt".format(env_dir)) - if args.force_rebuild_docker_images or (os.getuid() != 1000 and not args.dont_rebuild_docker_images): - print(f"Azure: rebuild Docker images for current user ID: {os.getuid()}") - execute(". {}/bin/activate && tools/build_docker_images.py --deployment azure".format(env_dir)) - elif os.getuid() != 1000 and args.dont_rebuild_docker_images: - print(f"Azure: Docker images are built for user with UID 1000, current UID: {os.getuid()}." - "Skipping rebuild as requested by user, but recommending to rebuild the images") flag = "TRUE" if args.azure else "FALSE" execute(f'echo "export SEBS_WITH_AZURE={flag}" >> {env_dir}/bin/activate') execute(f'echo "unset SEBS_WITH_AZURE" >> {env_dir}/bin/deactivate') @@ -69,22 +55,19 @@ def execute(cmd): if args.gcp: print("Install Python dependencies for GCP") execute(". {}/bin/activate && pip3 install -r requirements.gcp.txt".format(env_dir)) - if args.force_rebuild_docker_images or (os.getuid() != 1000 and not args.dont_rebuild_docker_images): - print(f"GCP: rebuild Docker images for current user ID: {os.getuid()}") - execute(". {}/bin/activate && tools/build_docker_images.py --deployment gcp".format(env_dir)) - elif os.getuid() != 1000 and args.dont_rebuild_docker_images: - print(f"GCP: Docker images are built for user with UID 1000, current UID: {os.getuid()}." - "Skipping rebuild as requested by user, but recommending to rebuild the images") flag = "TRUE" if args.gcp else "FALSE" execute(f'echo "export SEBS_WITH_GCP={flag}" >> {env_dir}/bin/activate') execute(f'echo "unset SEBS_WITH_GCP" >> {env_dir}/bin/deactivate') +flag = "TRUE" if args.openwhisk else "FALSE" +execute(f'echo "export SEBS_WITH_OPENWHISK={flag}" >> {env_dir}/bin/activate') +execute(f'echo "unset SEBS_WITH_OPENWHISK" >> {env_dir}/bin/deactivate') + if args.local: print("Install Python dependencies for local") execute(". {}/bin/activate && pip3 install -r requirements.local.txt".format(env_dir)) - if not args.dont_rebuild_docker_images: - print("Initialize Docker image for local storage.") - execute("docker pull minio/minio:latest") + print("Initialize Docker image for local storage.") + execute("docker pull minio/minio:latest") print("Initialize git submodules") execute("git submodule update --init --recursive") diff --git a/requirements.azure.txt b/requirements.azure.txt index f439c6a4..f7d82499 100644 --- a/requirements.azure.txt +++ b/requirements.azure.txt @@ -1 +1 @@ -azure-storage-blob==12.1.0 +azure-storage-blob==12.10.0 diff --git a/requirements.txt b/requirements.txt index 4b0f8e72..1b2e17d4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,3 +16,4 @@ scipy # pycurl>=7.43 click>=7.1.2 + diff --git a/sebs.py b/sebs.py index 11329508..ce78036c 100755 --- a/sebs.py +++ b/sebs.py @@ -5,7 +5,6 @@ import logging import functools import os -import sys import traceback from typing import cast, Optional @@ -13,8 +12,9 @@ import sebs from sebs import SeBS +from sebs.types import Storage as StorageTypes from sebs.regression import regression_suite -from sebs.utils import update_nested_dict +from sebs.utils import update_nested_dict, catch_interrupt from sebs.faas import System as FaaSSystem from sebs.faas.function import Trigger @@ -39,6 +39,7 @@ def __call__(self, *args, **kwargs): if sebs_client is not None: sebs_client.shutdown() + def simplified_common_params(func): @click.option( "--config", @@ -46,12 +47,8 @@ def simplified_common_params(func): type=click.Path(readable=True), help="Location of experiment config.", ) - @click.option( - "--output-dir", default=os.path.curdir, help="Output directory for results." - ) - @click.option( - "--output-file", default="out.log", help="Output filename for logging." - ) + @click.option("--output-dir", default=os.path.curdir, help="Output directory for results.") + @click.option("--output-file", default="out.log", help="Output filename for logging.") @click.option( "--cache", default=os.path.join(os.path.curdir, "cache"), @@ -69,15 +66,14 @@ def simplified_common_params(func): type=click.Choice(["python", "nodejs"]), help="Benchmark language", ) - @click.option( - "--language-version", default=None, type=str, help="Benchmark language version" - ) + @click.option("--language-version", default=None, type=str, help="Benchmark language version") @functools.wraps(func) def wrapper(*args, **kwargs): return func(*args, **kwargs) return wrapper + def common_params(func): @click.option( "--update-code/--no-update-code", @@ -92,7 +88,7 @@ def common_params(func): @click.option( "--deployment", default=None, - type=click.Choice(["azure", "aws", "gcp", "local"]), + type=click.Choice(["azure", "aws", "gcp", "local", "openwhisk"]), help="Cloud deployment to use.", ) @simplified_common_params @@ -116,8 +112,9 @@ def parse_common_params( language, language_version, initialize_deployment: bool = True, - ignore_cache: bool = False + ignore_cache: bool = False, ): + global sebs_client, deployment_client config_obj = json.load(open(config, "r")) os.makedirs(output_dir, exist_ok=True) @@ -129,13 +126,10 @@ def parse_common_params( # CLI overrides JSON options update_nested_dict(config_obj, ["experiments", "runtime", "language"], language) - update_nested_dict( - config_obj, ["experiments", "runtime", "version"], language_version - ) + update_nested_dict(config_obj, ["experiments", "runtime", "version"], language_version) update_nested_dict(config_obj, ["deployment", "name"], deployment) update_nested_dict(config_obj, ["experiments", "update_code"], update_code) update_nested_dict(config_obj, ["experiments", "update_storage"], update_storage) - update_nested_dict(config_obj, ["experiments", "benchmark"], benchmark) if initialize_deployment: deployment_client = sebs_client.get_deployment( @@ -148,6 +142,8 @@ def parse_common_params( if ignore_cache: sebs_client.ignore_cache() + catch_interrupt() + return config_obj, output_dir, logging_filename, sebs_client, deployment_client @@ -160,19 +156,30 @@ def cli(): def benchmark(): pass + @benchmark.command() @click.argument("benchmark", type=str) # , help="Benchmark to be used.") @click.argument( "benchmark-input-size", type=click.Choice(["test", "small", "large"]) ) # help="Input test size") -@click.option( - "--repetitions", default=5, type=int, help="Number of experimental repetitions." -) +@click.option("--repetitions", default=5, type=int, help="Number of experimental repetitions.") @click.option( "--trigger", type=click.Choice(["library", "http"]), default="http", - help="Function trigger to be used." + help="Function trigger to be used.", +) +@click.option( + "--memory", + default=None, + type=int, + help="Override default memory settings for the benchmark function.", +) +@click.option( + "--timeout", + default=None, + type=int, + help="Override default timeout settings for the benchmark function.", ) @click.option( "--function-name", @@ -180,8 +187,24 @@ def benchmark(): type=str, help="Override function name for random generation.", ) +@click.option( + "--image-tag-prefix", + default=None, + type=str, + help="Attach prefix to generated Docker image tag.", +) @common_params -def invoke(benchmark, benchmark_input_size, repetitions, trigger, function_name, **kwargs): +def invoke( + benchmark, + benchmark_input_size, + repetitions, + trigger, + memory, + timeout, + function_name, + image_tag_prefix, + **kwargs, +): ( config, @@ -190,34 +213,36 @@ def invoke(benchmark, benchmark_input_size, repetitions, trigger, function_name, sebs_client, deployment_client, ) = parse_common_params(**kwargs) + if image_tag_prefix is not None: + sebs_client.config.image_tag_prefix = image_tag_prefix + experiment_config = sebs_client.get_experiment_config(config["experiments"]) + update_nested_dict(config, ["experiments", "benchmark"], benchmark) benchmark_obj = sebs_client.get_benchmark( benchmark, deployment_client, experiment_config, logging_filename=logging_filename, ) + if memory is not None: + benchmark_obj.benchmark_config.memory = memory + if timeout is not None: + benchmark_obj.benchmark_config.timeout = timeout + func = deployment_client.get_function( - benchmark_obj, function_name if function_name else deployment_client.default_function_name(benchmark_obj) - ) - storage = deployment_client.get_storage( - replace_existing=experiment_config.update_storage - ) - input_config = benchmark_obj.prepare_input( - storage=storage, size=benchmark_input_size + benchmark_obj, + function_name if function_name else deployment_client.default_function_name(benchmark_obj), ) + storage = deployment_client.get_storage(replace_existing=experiment_config.update_storage) + input_config = benchmark_obj.prepare_input(storage=storage, size=benchmark_input_size) - result = sebs.experiments.ExperimentResult( - experiment_config, deployment_client.config - ) + result = sebs.experiments.ExperimentResult(experiment_config, deployment_client.config) result.begin() trigger_type = Trigger.TriggerType.get(trigger) triggers = func.triggers(trigger_type) if len(triggers) == 0: - trigger = deployment_client.create_trigger( - func, trigger_type - ) + trigger = deployment_client.create_trigger(func, trigger_type) else: trigger = triggers[0] for i in range(repetitions): @@ -225,9 +250,9 @@ def invoke(benchmark, benchmark_input_size, repetitions, trigger, function_name, ret = trigger.sync_invoke(input_config) if ret.stats.failure: sebs_client.logging.info(f"Failure on repetition {i+1}/{repetitions}") - #deployment_client.get_invocation_error( + # deployment_client.get_invocation_error( # function_name=func.name, start_time=start_time, end_time=end_time - #) + # ) result.add_invocation(func, ret) result.end() with open("experiments.json", "w") as out_f: @@ -252,17 +277,18 @@ def process(**kwargs): experiments = sebs.experiments.ExperimentResult.deserialize( config, sebs_client.cache_client, - sebs_client.logging_handlers(logging_filename), + sebs_client.generate_logging_handlers(logging_filename), ) for func in experiments.functions(): deployment_client.download_metrics( - func, *experiments.times(), experiments.invocations(func) + func, *experiments.times(), experiments.invocations(func), experiments.metrics(func) ) with open("results.json", "w") as out_f: out_f.write(sebs.utils.serialize(experiments)) sebs_client.logging.info("Save results to {}".format(os.path.abspath("results.json"))) + @benchmark.command() @click.argument( "benchmark-input-size", type=click.Choice(["test", "small", "large"]) @@ -280,63 +306,90 @@ def process(**kwargs): help="Location of experiments cache.", ) @click.option( - "--output-dir", default=os.path.join(os.path.curdir, "regression-output"), help="Output directory for results." + "--output-dir", + default=os.path.join(os.path.curdir, "regression-output"), + help="Output directory for results.", ) def regression(benchmark_input_size, benchmark_name, **kwargs): # for regression, deployment client is initialized locally # disable default initialization - ( - config, - output_dir, - logging_filename, - sebs_client, - _ - ) = parse_common_params( - initialize_deployment=False, - **kwargs + (config, output_dir, logging_filename, sebs_client, _) = parse_common_params( + initialize_deployment=False, **kwargs ) - succ = regression_suite( + regression_suite( sebs_client, config["experiments"], - set( (config['deployment']['name'],) ), + set((config["deployment"]["name"],)), config["deployment"], - benchmark_name + benchmark_name, ) + +@cli.group() +def storage(): + pass + + +@storage.command("start") +@click.argument("storage", type=click.Choice([StorageTypes.MINIO])) +@click.option("--output-json", type=click.Path(dir_okay=False, writable=True), default=None) +@click.option("--port", type=int, default=9000) +def storage_start(storage, output_json, port): + + import docker + + sebs.utils.global_logging() + storage_type = sebs.SeBS.get_storage_implementation(StorageTypes(storage)) + storage_instance = storage_type(docker.from_env(), None, True) + logging.info(f"Starting storage {str(storage)} on port {port}.") + storage_instance.start(port) + if output_json: + logging.info(f"Writing storage configuration to {output_json}.") + with open(output_json, "w") as f: + json.dump(storage_instance.serialize(), fp=f, indent=2) + else: + logging.info("Writing storage configuration to stdout.") + logging.info(json.dumps(storage_instance.serialize(), indent=2)) + + +@storage.command("stop") +@click.argument("input-json", type=click.Path(exists=True, dir_okay=False, readable=True)) +def storage_stop(input_json): + + sebs.utils.global_logging() + with open(input_json, "r") as f: + cfg = json.load(f) + storage_type = cfg["type"] + storage_cfg = sebs.SeBS.get_storage_config_implementation(storage_type).deserialize(cfg) + logging.info(f"Stopping storage deployment of {storage_type}.") + storage = sebs.SeBS.get_storage_implementation(storage_type).deserialize(storage_cfg, None) + storage.stop() + logging.info(f"Stopped storage deployment of {storage_type}.") + + @cli.group() def local(): pass + @local.command() @click.argument("benchmark", type=str) -@click.argument( - "benchmark-input-size", type=click.Choice(["test", "small", "large"]) -) +@click.argument("benchmark-input-size", type=click.Choice(["test", "small", "large"])) @click.argument("output", type=str) +@click.option("--deployments", default=1, type=int, help="Number of deployed containers.") @click.option( - "--deployments", default=1, type=int, help="Number of deployed containers." -) -@click.option( - "--remove-containers/--no-remove-containers", default=True, help="Remove containers after stopping." + "--remove-containers/--no-remove-containers", + default=True, + help="Remove containers after stopping.", ) @simplified_common_params def start(benchmark, benchmark_input_size, output, deployments, remove_containers, **kwargs): """ - Start a given number of function instances and a storage instance. + Start a given number of function instances and a storage instance. """ - ( - config, - output_dir, - logging_filename, - sebs_client, - deployment_client - ) = parse_common_params( - ignore_cache = True, - update_code = False, - update_storage = False, - deployment = "local", - **kwargs + (config, output_dir, logging_filename, sebs_client, deployment_client) = parse_common_params( + ignore_cache=True, update_code=False, update_storage=False, deployment="local", **kwargs ) deployment_client = cast(sebs.local.Local, deployment_client) deployment_client.remove_containers = remove_containers @@ -349,13 +402,9 @@ def start(benchmark, benchmark_input_size, output, deployments, remove_container experiment_config, logging_filename=logging_filename, ) - storage = deployment_client.get_storage( - replace_existing=experiment_config.update_storage - ) + storage = deployment_client.get_storage(replace_existing=experiment_config.update_storage) result.set_storage(storage) - input_config = benchmark_obj.prepare_input( - storage=storage, size=benchmark_input_size - ) + input_config = benchmark_obj.prepare_input(storage=storage, size=benchmark_input_size) result.add_input(input_config) for i in range(deployments): func = deployment_client.get_function( @@ -369,12 +418,13 @@ def start(benchmark, benchmark_input_size, output, deployments, remove_container result.serialize(output) sebs_client.logging.info(f"Save results to {os.path.abspath(output)}") + @local.command() @click.argument("input-json", type=str) -#@simplified_common_params +# @simplified_common_params def stop(input_json, **kwargs): """ - Stop function and storage containers. + Stop function and storage containers. """ sebs.utils.global_logging() @@ -384,6 +434,7 @@ def stop(input_json, **kwargs): deployment.shutdown() logging.info(f"Stopped deployment from {os.path.abspath(input_json)}") + @cli.group() def experiment(): pass @@ -418,9 +469,10 @@ def experment_process(experiment, extend_time_interval, **kwargs): deployment_client, ) = parse_common_params(**kwargs) experiment = sebs_client.get_experiment(experiment, config["experiments"]) - experiment.process(sebs_client, deployment_client, output_dir, logging_filename, extend_time_interval) + experiment.process( + sebs_client, deployment_client, output_dir, logging_filename, extend_time_interval + ) if __name__ == "__main__": cli() - diff --git a/sebs/__init__.py b/sebs/__init__.py index 6eceb356..b92b9f25 100644 --- a/sebs/__init__.py +++ b/sebs/__init__.py @@ -1,7 +1,9 @@ -from .sebs import SeBS # noqa +""" + SeBS +""" -# from .aws import * # noqa -# from .azure import * # noqa +from .version import __version__ # noqa +from .sebs import SeBS # noqa from .cache import Cache # noqa from .benchmark import Benchmark # noqa diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index 5519bd85..6c34af90 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -16,7 +16,7 @@ from sebs.cache import Cache from sebs.config import SeBSConfig from sebs.utils import LoggingHandlers -from sebs.faas.function import Function, ExecutionResult, Trigger +from sebs.faas.function import Function, ExecutionResult, Trigger, FunctionConfig from sebs.faas.storage import PersistentStorage from sebs.faas.system import System @@ -122,7 +122,14 @@ def get_storage(self, replace_existing: bool = False) -> PersistentStorage: benchmark: benchmark name """ - def package_code(self, directory: str, language_name: str, benchmark: str) -> Tuple[str, int]: + def package_code( + self, + directory: str, + language_name: str, + language_version: str, + benchmark: str, + is_cached: bool, + ) -> Tuple[str, int]: CONFIG_FILES = { "python": ["handler.py", "requirements.txt", ".python_packages"], @@ -161,6 +168,7 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFun code_bucket: Optional[str] = None func_name = AWS.format_function_name(func_name) storage_client = self.get_storage() + function_cfg = FunctionConfig.from_benchmark(code_package) # we can either check for exception or use list_functions # there's no API for test @@ -175,10 +183,9 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFun code_package.benchmark, ret["Configuration"]["FunctionArn"], code_package.hash, - timeout, - memory, language_runtime, self.config.resources.lambda_role(self.session), + function_cfg, ) self.update_function(lambda_function, code_package) lambda_function.updated_code = True @@ -210,20 +217,20 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFun Timeout=timeout, Code=code_config, ) - # url = self.create_http_trigger(func_name, None, None) - # print(url) + lambda_function = LambdaFunction( func_name, code_package.benchmark, ret["FunctionArn"], code_package.hash, - timeout, - memory, language_runtime, self.config.resources.lambda_role(self.session), + function_cfg, code_bucket, ) + self.wait_function_active(lambda_function) + # Add LibraryTrigger to a new function from sebs.aws.triggers import LibraryTrigger @@ -274,24 +281,32 @@ def update_function(self, function: Function, code_package: Benchmark): self.client.update_function_code( FunctionName=name, S3Bucket=bucket, S3Key=code_package_name ) - self.logging.info( - f"Updated code of {name} function. " - "Sleep 5 seconds before updating configuration to avoid cloud errors." - ) - time.sleep(5) + self.wait_function_updated(function) + self.logging.info(f"Updated code of {name} function. ") # and update config self.client.update_function_configuration( - FunctionName=name, Timeout=function.timeout, MemorySize=function.memory + FunctionName=name, Timeout=function.config.timeout, MemorySize=function.config.memory ) + self.wait_function_updated(function) + self.logging.info(f"Updated configuration of {name} function. ") + self.wait_function_updated(function) self.logging.info("Published new function code") + def update_function_configuration(self, function: Function, benchmark: Benchmark): + function = cast(LambdaFunction, function) + self.client.update_function_configuration( + FunctionName=function.name, + Timeout=function.config.timeout, + MemorySize=function.config.memory, + ) + self.wait_function_updated(function) + self.logging.info(f"Updated configuration of {function.name} function. ") + @staticmethod def default_function_name(code_package: Benchmark) -> str: # Create function name func_name = "{}-{}-{}".format( - code_package.benchmark, - code_package.language_name, - code_package.benchmark_config.memory, + code_package.benchmark, code_package.language_name, code_package.language_version ) return AWS.format_function_name(func_name) @@ -471,6 +486,11 @@ def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> T SourceArn=f"{http_api.arn}/*/*", ) trigger = HTTPTrigger(http_api.endpoint, api_name) + self.logging.info( + f"Created HTTP trigger for {func.name} function. " + "Sleep 5 seconds to avoid cloud errors." + ) + time.sleep(5) trigger.logging_handlers = self.logging_handlers elif trigger_type == Trigger.TriggerType.LIBRARY: # should already exist @@ -486,8 +506,8 @@ def _enforce_cold_start(self, function: Function): func = cast(LambdaFunction, function) self.get_lambda_client().update_function_configuration( FunctionName=func.name, - Timeout=func.timeout, - MemorySize=func.memory, + Timeout=func.config.timeout, + MemorySize=func.config.memory, Environment={"Variables": {"ForceColdStart": str(self.cold_start_counter)}}, ) @@ -495,6 +515,22 @@ def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) self.cold_start_counter += 1 for func in functions: self._enforce_cold_start(func) - import time + self.logging.info("Sent function updates enforcing cold starts.") + for func in functions: + lambda_function = cast(LambdaFunction, func) + self.wait_function_updated(lambda_function) + self.logging.info("Finished function updates enforcing cold starts.") + + def wait_function_active(self, func: LambdaFunction): + + self.logging.info("Waiting for Lambda function to be created...") + waiter = self.client.get_waiter("function_active_v2") + waiter.wait(FunctionName=func.name) + self.logging.info("Lambda function has been created.") + + def wait_function_updated(self, func: LambdaFunction): - time.sleep(5) + self.logging.info("Waiting for Lambda function to be updated...") + waiter = self.client.get_waiter("function_updated_v2") + waiter.wait(FunctionName=func.name) + self.logging.info("Lambda function has been updated.") diff --git a/sebs/aws/function.py b/sebs/aws/function.py index 36b52c27..a36dc821 100644 --- a/sebs/aws/function.py +++ b/sebs/aws/function.py @@ -1,7 +1,7 @@ from typing import cast, Optional from sebs.aws.s3 import S3 -from sebs.faas.function import Function +from sebs.faas.function import Function, FunctionConfig class LambdaFunction(Function): @@ -11,18 +11,15 @@ def __init__( benchmark: str, arn: str, code_package_hash: str, - timeout: int, - memory: int, runtime: str, role: str, + cfg: FunctionConfig, bucket: Optional[str] = None, ): - super().__init__(benchmark, name, code_package_hash) + super().__init__(benchmark, name, code_package_hash, cfg) self.arn = arn - self.timeout = timeout - self.memory = memory - self.runtime = runtime self.role = role + self.runtime = runtime self.bucket = bucket @staticmethod @@ -33,8 +30,6 @@ def serialize(self) -> dict: return { **super().serialize(), "arn": self.arn, - "timeout": self.timeout, - "memory": self.memory, "runtime": self.runtime, "role": self.role, "bucket": self.bucket, @@ -45,15 +40,15 @@ def deserialize(cached_config: dict) -> "LambdaFunction": from sebs.faas.function import Trigger from sebs.aws.triggers import LibraryTrigger, HTTPTrigger + cfg = FunctionConfig.deserialize(cached_config["config"]) ret = LambdaFunction( cached_config["name"], cached_config["benchmark"], cached_config["arn"], cached_config["hash"], - cached_config["timeout"], - cached_config["memory"], cached_config["runtime"], cached_config["role"], + cfg, cached_config["bucket"], ) for trigger in cached_config["triggers"]: diff --git a/sebs/aws/s3.py b/sebs/aws/s3.py index e47bd77f..765cace3 100644 --- a/sebs/aws/s3.py +++ b/sebs/aws/s3.py @@ -103,6 +103,13 @@ def download(self, bucket_name: str, key: str, filepath: str): self.logging.info("Download {}:{} to {}".format(bucket_name, key, filepath)) self.client.download_file(Bucket=bucket_name, Key=key, Filename=filepath) + def exists_bucket(self, bucket_name: str) -> bool: + try: + self.client.head_bucket(Bucket=bucket_name) + return True + except self.client.exceptions.ClientError: + return False + def list_bucket(self, bucket_name: str): objects_list = self.client.list_objects_v2(Bucket=bucket_name) objects: List[str] diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index a12289e4..e957d693 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -17,7 +17,7 @@ from sebs.cache import Cache from sebs.config import SeBSConfig from sebs.utils import LoggingHandlers, execute -from ..faas.function import Function, ExecutionResult +from ..faas.function import Function, FunctionConfig, ExecutionResult from ..faas.storage import PersistentStorage from ..faas.system import System @@ -114,7 +114,14 @@ def get_storage(self, replace_existing: bool = False) -> PersistentStorage: # - function.json # host.json # requirements.txt/package.json - def package_code(self, directory: str, language_name: str, benchmark: str) -> Tuple[str, int]: + def package_code( + self, + directory: str, + language_name: str, + language_version: str, + benchmark: str, + is_cached: bool, + ) -> Tuple[str, int]: # In previous step we ran a Docker container which installed packages # Python packages are in .python_packages because this is expected by Azure @@ -237,6 +244,10 @@ def update_function(self, function: Function, code_package: Benchmark): trigger.logging_handlers = self.logging_handlers function.add_trigger(trigger) + def update_function_configuration(self, function: Function, code_package: Benchmark): + # FIXME: this does nothing currently - we don't specify timeout + self.logging.warn("Updating function's memory and timeout configuration is not supported.") + def _mount_function_code(self, code_package: Benchmark): self.cli_instance.upload_package(code_package.code_location, "/mnt/function/") @@ -245,9 +256,10 @@ def default_function_name(self, code_package: Benchmark) -> str: Functionapp names must be globally unique in Azure. """ func_name = ( - "{}-{}-{}".format( + "{}-{}-{}-{}".format( code_package.benchmark, code_package.language_name, + code_package.language_version, self.config.resources_id, ) .replace(".", "-") @@ -261,6 +273,7 @@ def create_function(self, code_package: Benchmark, func_name: str) -> AzureFunct language_runtime = code_package.language_version resource_group = self.config.resources.resource_group(self.cli_instance) region = self.config.region + function_cfg = FunctionConfig.from_benchmark(code_package) config = { "resource_group": resource_group, @@ -320,6 +333,7 @@ def create_function(self, code_package: Benchmark, func_name: str) -> AzureFunct benchmark=code_package.benchmark, code_hash=code_package.hash, function_storage=function_storage_account, + cfg=function_cfg, ) # update existing function app @@ -362,6 +376,8 @@ def download_metrics( metrics: Dict[str, dict], ): + self.cli_instance.install_insights() + resource_group = self.config.resources.resource_group(self.cli_instance) # Avoid warnings in the next step ret = self.cli_instance.execute( diff --git a/sebs/azure/blob_storage.py b/sebs/azure/blob_storage.py index cad108a8..96558ff6 100644 --- a/sebs/azure/blob_storage.py +++ b/sebs/azure/blob_storage.py @@ -18,7 +18,7 @@ def deployment_name(): def __init__(self, region: str, cache_client: Cache, conn_string: str, replace_existing: bool): super().__init__(region, cache_client, replace_existing) - self.client = BlobServiceClient.from_connection_string(conn_string) + self.client: BlobServiceClient = BlobServiceClient.from_connection_string(conn_string) """ Internal implementation of creating a new container. @@ -83,7 +83,10 @@ def upload(self, container_name: str, filepath: str, key: str): self.logging.info("Upload {} to {}".format(filepath, container_name)) client = self.client.get_blob_client(container_name, key) with open(filepath, "rb") as upload_file: - client.upload_blob(upload_file.read()) + client.upload_blob(upload_file) # type: ignore + + def exists_bucket(self, container: str) -> bool: + return self.client.get_container_client(container).exists() """ Return list of files in a container. diff --git a/sebs/azure/cli.py b/sebs/azure/cli.py index f98226e4..97fbe706 100644 --- a/sebs/azure/cli.py +++ b/sebs/azure/cli.py @@ -25,14 +25,27 @@ def __init__(self, system_config: SeBSConfig, docker_client: docker.client): self.docker_instance = docker_client.containers.run( image=repo_name + ":" + image_name, command="/bin/bash", - user="1000:1000", - volumes={}, - # remove=True, + environment={ + "CONTAINER_UID": str(os.getuid()), + "CONTAINER_GID": str(os.getgid()), + "CONTAINER_USER": "docker_user", + }, + remove=True, stdout=True, stderr=True, detach=True, tty=True, ) + self._insights_installed = False + logging.info("Started Azure CLI container.") + while True: + try: + dkg = self.docker_instance.logs(stream=True, follow=True) + next(dkg).decode("utf-8") + break + except StopIteration: + pass + logging.info("Starting Azure manage Docker instance") """ @@ -74,6 +87,10 @@ def upload_package(self, directory: str, dest: str): self.execute("mkdir -p {}".format(dest)) self.docker_instance.put_archive(path=dest, data=handle.read()) + def install_insights(self): + if not self._insights_installed: + self.execute("az extension add --name application-insights") + """ Shutdowns Docker instance. """ diff --git a/sebs/azure/config.py b/sebs/azure/config.py index 20591595..6bb5ee51 100644 --- a/sebs/azure/config.py +++ b/sebs/azure/config.py @@ -297,7 +297,7 @@ def initialize(cfg: Config, dct: dict): else: config._resources_id = str(uuid.uuid1())[0:8] config.logging.info( - f"Azure: generating unique resource name for" + f"Azure: generating unique resource name for " f"the experiments: {config._resources_id}" ) diff --git a/sebs/azure/function.py b/sebs/azure/function.py index ade7e980..61ef4c57 100644 --- a/sebs/azure/function.py +++ b/sebs/azure/function.py @@ -1,5 +1,5 @@ from sebs.azure.config import AzureResources -from sebs.faas.function import Function +from sebs.faas.function import Function, FunctionConfig class AzureFunction(Function): @@ -9,8 +9,9 @@ def __init__( benchmark: str, code_hash: str, function_storage: AzureResources.Storage, + cfg: FunctionConfig, ): - super().__init__(benchmark, name, code_hash) + super().__init__(benchmark, name, code_hash, cfg) self.function_storage = function_storage def serialize(self) -> dict: @@ -21,11 +22,13 @@ def serialize(self) -> dict: @staticmethod def deserialize(cached_config: dict) -> Function: + cfg = FunctionConfig.deserialize(cached_config["config"]) ret = AzureFunction( cached_config["name"], cached_config["benchmark"], cached_config["hash"], AzureResources.Storage.deserialize(cached_config["function_storage"]), + cfg, ) from sebs.azure.triggers import HTTPTrigger diff --git a/sebs/benchmark.py b/sebs/benchmark.py index a631f2d8..0c51b2cd 100644 --- a/sebs/benchmark.py +++ b/sebs/benchmark.py @@ -16,7 +16,7 @@ if TYPE_CHECKING: from sebs.experiments.config import Config as ExperimentConfig - from sebs.experiments.config import Language + from sebs.faas.function import Language class BenchmarkConfig: @@ -29,10 +29,18 @@ def __init__(self, timeout: int, memory: int, languages: List["Language"]): def timeout(self) -> int: return self._timeout + @timeout.setter + def timeout(self, val: int): + self._timeout = val + @property def memory(self) -> int: return self._memory + @memory.setter + def memory(self, val: int): + self._memory = val + @property def languages(self) -> List["Language"]: return self._languages @@ -40,7 +48,7 @@ def languages(self) -> List["Language"]: # FIXME: 3.7+ python with future annotations @staticmethod def deserialize(json_object: dict) -> "BenchmarkConfig": - from sebs.experiments.config import Language + from sebs.faas.function import Language return BenchmarkConfig( json_object["timeout"], @@ -170,7 +178,9 @@ def __init__( self._docker_client = docker_client self._system_config = system_config self._hash_value = None - self._output_dir = os.path.join(output_dir, f"{benchmark}_code") + self._output_dir = os.path.join( + output_dir, f"{benchmark}_code", self._language.value, self._language_version + ) # verify existence of function in cache self.query_cache() @@ -215,6 +225,7 @@ def query_cache(self): deployment=self._deployment_name, benchmark=self._benchmark, language=self.language_name, + language_version=self.language_version, ) self._functions = self._cache_client.get_functions( deployment=self._deployment_name, @@ -297,7 +308,7 @@ def add_deployment_package_nodejs(self, output_dir): json.dump(package_json, package_file, indent=2) def add_deployment_package(self, output_dir): - from sebs.experiments.config import Language + from sebs.faas.function import Language if self.language == Language.PYTHON: self.add_deployment_package_python(output_dir) @@ -321,8 +332,8 @@ def install_dependencies(self, output_dir): ): self.logging.info( ( - "Docker build image for {deployment} run in {language} " - "is not available, skipping" + "There is no Docker build image for {deployment} run in {language}, " + "thus skipping the Docker-based installation of dependencies." ).format(deployment=self._deployment_name, language=self.language_name) ) else: @@ -378,9 +389,12 @@ def install_dependencies(self, output_dir): stdout = self._docker_client.containers.run( "{}:{}".format(repo_name, image_name), volumes=volumes, - environment={"APP": self.benchmark}, - # user="1000:1000", - user=uid, + environment={ + "CONTAINER_UID": str(os.getuid()), + "CONTAINER_GID": str(os.getgid()), + "CONTAINER_USER": "docker_user", + "APP": self.benchmark, + }, remove=True, stdout=True, stderr=True, @@ -413,7 +427,10 @@ def install_dependencies(self, output_dir): container.put_archive("/mnt/function", data.read()) # do the build step exit_code, stdout = container.exec_run( - cmd="/bin/bash installer.sh", stdout=True, stderr=True + cmd="/bin/bash /sebs/installer.sh", + user="docker_user", + stdout=True, + stderr=True, ) # copy updated code with package data, stat = container.get_archive("/mnt/function") @@ -447,7 +464,7 @@ def recalculate_code_size(self): return self._code_size def build( - self, deployment_build_step: Callable[[str, str, str], Tuple[str, int]] + self, deployment_build_step: Callable[[str, str, str, str, bool], Tuple[str, int]] ) -> Tuple[bool, str]: # Skip build if files are up to date and user didn't enforce rebuild @@ -477,7 +494,11 @@ def build( self.add_deployment_package(self._output_dir) self.install_dependencies(self._output_dir) self._code_location, self._code_size = deployment_build_step( - os.path.abspath(self._output_dir), self.language_name, self.benchmark + os.path.abspath(self._output_dir), + self.language_name, + self.language_version, + self.benchmark, + self.is_cached, ) self.logging.info( ( diff --git a/sebs/cache.py b/sebs/cache.py index dcce8ff7..ed5096e6 100644 --- a/sebs/cache.py +++ b/sebs/cache.py @@ -7,7 +7,7 @@ import threading from typing import Any, Callable, Dict, List, Optional, TYPE_CHECKING # noqa -from sebs.utils import LoggingBase +from sebs.utils import LoggingBase, serialize if TYPE_CHECKING: from sebs.benchmark import Benchmark @@ -59,7 +59,7 @@ def typename() -> str: def load_config(self): with self._lock: - for cloud in ["azure", "aws", "gcp"]: + for cloud in ["azure", "aws", "gcp", "openwhisk"]: cloud_config_file = os.path.join(self.cache_dir, "{}.json".format(cloud)) if os.path.exists(cloud_config_file): self.cached_config[cloud] = json.load(open(cloud_config_file, "r")) @@ -86,7 +86,7 @@ def unlock(self): def shutdown(self): if self.config_updated: - for cloud in ["azure", "aws", "gcp"]: + for cloud in ["azure", "aws", "gcp", "openwhisk"]: if cloud in self.cached_config: cloud_config_file = os.path.join(self.cache_dir, "{}.json".format(cloud)) self.logging.info("Update cached config {}".format(cloud_config_file)) @@ -121,11 +121,11 @@ def get_benchmark_config(self, deployment: str, benchmark: str): """ def get_code_package( - self, deployment: str, benchmark: str, language: str + self, deployment: str, benchmark: str, language: str, language_version: str ) -> Optional[Dict[str, Any]]: cfg = self.get_benchmark_config(deployment, benchmark) - if cfg and language in cfg: - return cfg[language]["code_package"] + if cfg and language in cfg and language_version in cfg[language]["code_package"]: + return cfg[language]["code_package"][language_version] else: return None @@ -165,10 +165,11 @@ def update_storage(self, deployment: str, benchmark: str, config: dict): def add_code_package(self, deployment_name: str, language_name: str, code_package: "Benchmark"): with self._lock: language = code_package.language_name + language_version = code_package.language_version benchmark_dir = os.path.join(self.cache_dir, code_package.benchmark) os.makedirs(benchmark_dir, exist_ok=True) # Check if cache directory for this deployment exist - cached_dir = os.path.join(benchmark_dir, deployment_name, language) + cached_dir = os.path.join(benchmark_dir, deployment_name, language, language_version) if not os.path.exists(cached_dir): os.makedirs(cached_dir, exist_ok=True) @@ -181,29 +182,43 @@ def add_code_package(self, deployment_name: str, language_name: str, code_packag package_name = os.path.basename(code_package.code_location) cached_location = os.path.join(cached_dir, package_name) shutil.copy2(code_package.code_location, cached_dir) - language_config: Dict[str, Any] = { - "code_package": code_package.serialize(), - "functions": {}, - } + language_config = code_package.serialize() # don't store absolute path to avoid problems with moving cache dir relative_cached_loc = os.path.relpath(cached_location, self.cache_dir) - language_config["code_package"]["location"] = relative_cached_loc + language_config["location"] = relative_cached_loc date = str(datetime.datetime.now()) - language_config["code_package"]["date"] = { + language_config["date"] = { "created": date, "modified": date, } - config = {deployment_name: {language: language_config}} + # config = {deployment_name: {language: language_config}} + config = { + deployment_name: { + language: { + "code_package": {language_version: language_config}, + "functions": {}, + } + } + } + # make sure to not replace other entries if os.path.exists(os.path.join(benchmark_dir, "config.json")): with open(os.path.join(benchmark_dir, "config.json"), "r") as fp: cached_config = json.load(fp) if deployment_name in cached_config: - cached_config[deployment_name][language] = language_config + # language known, platform known, extend dictionary + if language in cached_config[deployment_name]: + cached_config[deployment_name][language]["code_package"][ + language_version + ] = language_config + # language unknown, platform known - add new dictionary + else: + cached_config[deployment_name][language] = config[deployment_name][ + language + ] else: - cached_config[deployment_name] = { - language: language_config, - } + # language unknown, platform unknown - add new dictionary + cached_config[deployment_name] = config[deployment_name] config = cached_config with open(os.path.join(benchmark_dir, "config.json"), "w") as fp: json.dump(config, fp, indent=2) @@ -220,9 +235,10 @@ def update_code_package( ): with self._lock: language = code_package.language_name + language_version = code_package.language_version benchmark_dir = os.path.join(self.cache_dir, code_package.benchmark) # Check if cache directory for this deployment exist - cached_dir = os.path.join(benchmark_dir, deployment_name, language) + cached_dir = os.path.join(benchmark_dir, deployment_name, language, language_version) if os.path.exists(cached_dir): # copy code @@ -242,8 +258,12 @@ def update_code_package( with open(os.path.join(benchmark_dir, "config.json"), "r") as fp: config = json.load(fp) date = str(datetime.datetime.now()) - config[deployment_name][language]["code_package"]["date"]["modified"] = date - config[deployment_name][language]["code_package"]["hash"] = code_package.hash + config[deployment_name][language]["code_package"][language_version]["date"][ + "modified" + ] = date + config[deployment_name][language]["code_package"][language_version][ + "hash" + ] = code_package.hash with open(os.path.join(benchmark_dir, "config.json"), "w") as fp: json.dump(config, fp, indent=2) else: @@ -287,7 +307,7 @@ def add_function( ) config = cached_config with open(cache_config, "w") as fp: - json.dump(config, fp, indent=2) + fp.write(serialize(config)) else: raise RuntimeError( "Can't cache function {} for a non-existing code package!".format(function.name) @@ -314,7 +334,7 @@ def update_function(self, function: "Function"): name ] = function.serialize() with open(cache_config, "w") as fp: - json.dump(cached_config, fp, indent=2) + fp.write(serialize(cached_config)) else: raise RuntimeError( "Can't cache function {} for a non-existing code package!".format(function.name) diff --git a/sebs/config.py b/sebs/config.py index fd7f66aa..cfafbf00 100644 --- a/sebs/config.py +++ b/sebs/config.py @@ -1,5 +1,5 @@ import json -from typing import Dict, List +from typing import Dict, List, Optional from sebs.utils import project_absolute_path @@ -8,6 +8,15 @@ class SeBSConfig: def __init__(self): with open(project_absolute_path("config", "systems.json"), "r") as cfg: self._system_config = json.load(cfg) + self._image_tag_prefix = "" + + @property + def image_tag_prefix(self) -> str: + return self._image_tag_prefix + + @image_tag_prefix.setter + def image_tag_prefix(self, tag: str): + self._image_tag_prefix = tag def docker_repository(self) -> str: return self._system_config["general"]["docker_repository"] @@ -30,5 +39,32 @@ def supported_language_versions(self, deployment_name: str, language_name: str) "base_images" ].keys() + def benchmark_base_images(self, deployment_name: str, language_name: str) -> Dict[str, str]: + return self._system_config[deployment_name]["languages"][language_name]["base_images"] + + def benchmark_image_name( + self, + system: str, + benchmark: str, + language_name: str, + language_version: str, + registry: Optional[str] = None, + ) -> str: + + tag = self.benchmark_image_tag(system, benchmark, language_name, language_version) + repo_name = self.docker_repository() + if registry is not None: + return f"{registry}/{repo_name}:{tag}" + else: + return f"{repo_name}:{tag}" + + def benchmark_image_tag( + self, system: str, benchmark: str, language_name: str, language_version: str + ) -> str: + tag = f"function.{system}.{benchmark}.{language_name}-{language_version}" + if self.image_tag_prefix: + tag = f"{tag}-{self.image_tag_prefix}" + return tag + def username(self, deployment_name: str, language_name: str) -> str: return self._system_config[deployment_name]["languages"][language_name]["username"] diff --git a/sebs/experiments/config.py b/sebs/experiments/config.py index 8f6bb7bf..a5ca3f0b 100644 --- a/sebs/experiments/config.py +++ b/sebs/experiments/config.py @@ -1,48 +1,6 @@ -from enum import Enum from typing import Dict - -class Language(Enum): - PYTHON = "python" - NODEJS = "nodejs" - - # FIXME: 3.7+ python with future annotations - @staticmethod - def deserialize(val: str) -> "Language": - for member in Language: - if member.value == val: - return member - raise Exception("Unknown language type {}".format(member)) - - -class Runtime: - - _language: Language - _version: str - - @property - def language(self) -> Language: - return self._language - - @property - def version(self) -> str: - return self._version - - @version.setter - def version(self, val: str): - self._version = val - - def serialize(self) -> dict: - return {"language": self._language.value, "version": self._version} - - # FIXME: 3.7+ python with future annotations - @staticmethod - def deserialize(config: dict) -> "Runtime": - cfg = Runtime() - languages = {"python": Language.PYTHON, "nodejs": Language.NODEJS} - cfg._language = languages[config["language"]] - cfg._version = config["version"] - return cfg +from sebs.faas.function import Runtime class Config: @@ -52,7 +10,7 @@ def __init__(self): self._download_results: bool = False self._flags: Dict[str, bool] = {} self._experiment_configs: Dict[str, dict] = {} - self._runtime = Runtime() + self._runtime = Runtime(None, None) @property def update_code(self) -> bool: diff --git a/sebs/experiments/perf_cost.py b/sebs/experiments/perf_cost.py index 3fc81482..36cde660 100644 --- a/sebs/experiments/perf_cost.py +++ b/sebs/experiments/perf_cost.py @@ -87,7 +87,7 @@ def run(self): def compute_statistics(self, times: List[float]): mean, median, std, cv = basic_stats(times) - self.logging.info(f"Mean {mean}, median {median}, std {std}, CV {cv}") + self.logging.info(f"Mean {mean} [ms], median {median} [ms], std {std}, CV {cv}") for alpha in [0.95, 0.99]: ci_interval = ci_tstudents(alpha, times) interval_width = ci_interval[1] - ci_interval[0] @@ -173,15 +173,11 @@ def _run_configuration( ret = res.get() if first_iteration: continue - if (run_type == PerfCost.RunType.COLD and not ret.stats.cold_start) or ( - run_type == PerfCost.RunType.WARM and ret.stats.cold_start - ): - self.logging.info( - f"Invocation {ret.request_id} " - f"cold: {ret.stats.cold_start} " - f"on experiment {run_type.str()}!" - ) + if run_type == PerfCost.RunType.COLD and not ret.stats.cold_start: + self.logging.info(f"Invocation {ret.request_id} is not cold!") incorrect.append(ret) + elif run_type == PerfCost.RunType.WARM and ret.stats.cold_start: + self.logging.info(f"Invocation {ret.request_id} is cold!") else: result.add_invocation(self._function, ret) colds_count += ret.stats.cold_start @@ -190,14 +186,15 @@ def _run_configuration( except Exception as e: error_count += 1 error_executions.append(str(e)) - self.logging.info( - f"Processed {samples_gathered} samples out of {repetitions}," - f"{error_count} errors" - ) samples_generated += invocations if first_iteration: self.logging.info( - f"Processed {samples_gathered} warm-up samples, ignore results." + f"Processed {samples_gathered} warm-up samples, ignoring these results." + ) + else: + self.logging.info( + f"Processed {samples_gathered} samples out of {repetitions}," + f" {error_count} errors" ) first_iteration = False diff --git a/sebs/experiments/result.py b/sebs/experiments/result.py index 1a56684c..b28de75c 100644 --- a/sebs/experiments/result.py +++ b/sebs/experiments/result.py @@ -44,10 +44,16 @@ def add_result_bucket(self, result_bucket: str): self.result_bucket = result_bucket def add_invocation(self, func: Function, invocation: ExecutionResult): + # the function has most likely failed, thus no request id + if invocation.request_id: + req_id = invocation.request_id + else: + req_id = f"failed-{len(self._invocations.get(func.name, []))}" + if func.name in self._invocations: - self._invocations.get(func.name)[invocation.request_id] = invocation # type: ignore + self._invocations.get(func.name)[req_id] = invocation # type: ignore else: - self._invocations[func.name] = {invocation.request_id: invocation} + self._invocations[func.name] = {req_id: invocation} def functions(self) -> List[str]: return list(self._invocations.keys()) diff --git a/sebs/faas/config.py b/sebs/faas/config.py index 55730e88..294e7b49 100644 --- a/sebs/faas/config.py +++ b/sebs/faas/config.py @@ -118,6 +118,10 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Confi from sebs.gcp.config import GCPConfig implementations["gcp"] = GCPConfig.deserialize + if has_platform("openwhisk"): + from sebs.openwhisk.config import OpenWhiskConfig + + implementations["openwhisk"] = OpenWhiskConfig.deserialize func = implementations.get(name) assert func, "Unknown config type!" return func(config[name] if name in config else config, cache, handlers) diff --git a/sebs/faas/function.py b/sebs/faas/function.py index 56688779..5b1bf748 100644 --- a/sebs/faas/function.py +++ b/sebs/faas/function.py @@ -1,11 +1,15 @@ +from __future__ import annotations + import json +import concurrent.futures from abc import ABC from abc import abstractmethod -import concurrent.futures +from dataclasses import dataclass from datetime import datetime, timedelta from enum import Enum -from typing import Callable, Dict, List, Optional # noqa +from typing import Callable, Dict, List, Optional, Type, TypeVar # noqa +from sebs.benchmark import Benchmark from sebs.utils import LoggingBase """ @@ -180,7 +184,7 @@ def get(name: str) -> "Trigger.TriggerType": return member raise Exception("Unknown trigger type {}".format(member)) - def _http_invoke(self, payload: dict, url: str) -> ExecutionResult: + def _http_invoke(self, payload: dict, url: str, verify_ssl: bool = True) -> ExecutionResult: import pycurl from io import BytesIO @@ -188,6 +192,9 @@ def _http_invoke(self, payload: dict, url: str) -> ExecutionResult: c.setopt(pycurl.HTTPHEADER, ["Content-Type: application/json"]) c.setopt(pycurl.POST, 1) c.setopt(pycurl.URL, url) + if not verify_ssl: + c.setopt(pycurl.SSL_VERIFYHOST, 0) + c.setopt(pycurl.SSL_VERIFYPEER, 0) data = BytesIO() c.setopt(pycurl.WRITEFUNCTION, data.write) @@ -211,6 +218,9 @@ def _http_invoke(self, payload: dict, url: str) -> ExecutionResult: result = ExecutionResult.from_times(begin, end) result.times.http_startup = conn_time result.times.http_first_byte_return = receive_time + # OpenWhisk will not return id on a failure + if "request_id" not in output: + raise RuntimeError(f"Cannot process allocation with output: {output}") result.request_id = output["request_id"] # General benchmark output parsing result.parse_benchmark_output(output) @@ -244,6 +254,85 @@ def deserialize(cached_config: dict) -> "Trigger": pass +class Language(Enum): + PYTHON = "python" + NODEJS = "nodejs" + + # FIXME: 3.7+ python with future annotations + @staticmethod + def deserialize(val: str) -> Language: + for member in Language: + if member.value == val: + return member + raise Exception(f"Unknown language type {member}") + + +class Architecture(Enum): + X86 = "x86" + ARM = "arm" + + def serialize(self) -> str: + return self.value + + @staticmethod + def deserialize(val: str) -> Architecture: + for member in Architecture: + if member.value == val: + return member + raise Exception(f"Unknown architecture type {member}") + + +@dataclass +class Runtime: + + language: Language + version: str + + def serialize(self) -> dict: + return {"language": self.language.value, "version": self.version} + + @staticmethod + def deserialize(config: dict) -> Runtime: + languages = {"python": Language.PYTHON, "nodejs": Language.NODEJS} + return Runtime(language=languages[config["language"]], version=config["version"]) + + +T = TypeVar("T", bound="FunctionConfig") + + +@dataclass +class FunctionConfig: + timeout: int + memory: int + runtime: Runtime + architecture: Architecture = Architecture.X86 + + @staticmethod + def _from_benchmark(benchmark: Benchmark, obj_type: Type[T]) -> T: + runtime = Runtime(language=benchmark.language, version=benchmark.language_version) + cfg = obj_type( + timeout=benchmark.benchmark_config.timeout, + memory=benchmark.benchmark_config.memory, + runtime=runtime, + ) + # FIXME: configure architecture + return cfg + + @staticmethod + def from_benchmark(benchmark: Benchmark) -> FunctionConfig: + return FunctionConfig._from_benchmark(benchmark, FunctionConfig) + + @staticmethod + def deserialize(data: dict) -> FunctionConfig: + keys = list(FunctionConfig.__dataclass_fields__.keys()) + data = {k: v for k, v in data.items() if k in keys} + data["runtime"] = Runtime.deserialize(data["runtime"]) + return FunctionConfig(**data) + + def serialize(self) -> dict: + return self.__dict__ + + """ Abstraction base class for FaaS function. Contains a list of associated triggers and might implement non-trigger execution if supported by the SDK. @@ -252,13 +341,18 @@ def deserialize(cached_config: dict) -> "Trigger": class Function(LoggingBase): - def __init__(self, benchmark: str, name: str, code_hash: str): + def __init__(self, benchmark: str, name: str, code_hash: str, cfg: FunctionConfig): super().__init__() self._benchmark = benchmark self._name = name self._code_package_hash = code_hash self._updated_code = False self._triggers: Dict[Trigger.TriggerType, List[Trigger]] = {} + self._cfg = cfg + + @property + def config(self) -> FunctionConfig: + return self._cfg @property def name(self): @@ -304,6 +398,7 @@ def serialize(self) -> dict: "name": self._name, "hash": self._code_package_hash, "benchmark": self._benchmark, + "config": self.config.serialize(), "triggers": [ obj.serialize() for t_type, triggers in self._triggers.items() for obj in triggers ], diff --git a/sebs/faas/storage.py b/sebs/faas/storage.py index e54812e2..d3781f2e 100644 --- a/sebs/faas/storage.py +++ b/sebs/faas/storage.py @@ -152,6 +152,10 @@ def list_bucket(self, bucket_name: str) -> List[str]: def list_buckets(self, bucket_name: str) -> List[str]: pass + @abstractmethod + def exists_bucket(self, bucket_name: str) -> bool: + pass + @abstractmethod def clean_bucket(self, bucket_name: str): pass @@ -170,16 +174,33 @@ def allocate_buckets(self, benchmark: str, requested_buckets: Tuple[int, int]): # Load cached information cached_buckets = self.cache_client.get_storage_config(self.deployment_name(), benchmark) if cached_buckets: - self.input_buckets = cached_buckets["buckets"]["input"] - for bucket in self.input_buckets: - self.input_buckets_files.append(self.list_bucket(bucket)) - self.output_buckets = cached_buckets["buckets"]["output"] - # for bucket in self.output_buckets: - # self.clean_bucket(bucket) - self.cached = True - self.logging.info("Using cached storage input buckets {}".format(self.input_buckets)) - self.logging.info("Using cached storage output buckets {}".format(self.output_buckets)) - return + cache_valid = True + for bucket in [ + *cached_buckets["buckets"]["input"], + *cached_buckets["buckets"]["output"], + ]: + if not self.exists_bucket(bucket): + cache_valid = False + self.logging.info(f"Cached storage buckets {bucket} does not exist.") + break + + if cache_valid: + self.input_buckets = cached_buckets["buckets"]["input"] + for bucket in self.input_buckets: + self.input_buckets_files.append(self.list_bucket(bucket)) + self.output_buckets = cached_buckets["buckets"]["output"] + # for bucket in self.output_buckets: + # self.clean_bucket(bucket) + self.cached = True + self.logging.info( + "Using cached storage input buckets {}".format(self.input_buckets) + ) + self.logging.info( + "Using cached storage output buckets {}".format(self.output_buckets) + ) + return + else: + self.logging.info("Cached storage buckets are no longer valid, creating new ones.") buckets = self.list_buckets(self.correct_name(benchmark)) for i in range(0, requested_buckets[0]): diff --git a/sebs/faas/system.py b/sebs/faas/system.py index cdc3a656..64923255 100644 --- a/sebs/faas/system.py +++ b/sebs/faas/system.py @@ -104,7 +104,14 @@ def get_storage(self, replace_existing: bool) -> PersistentStorage: """ @abstractmethod - def package_code(self, directory: str, language_name: str, benchmark: str) -> Tuple[str, int]: + def package_code( + self, + directory: str, + language_name: str, + language_version: str, + benchmark: str, + is_cached: bool, + ) -> Tuple[str, int]: pass @abstractmethod @@ -184,12 +191,18 @@ def get_function(self, code_package: Benchmark, func_name: Optional[str] = None) ) # is the function up-to-date? if function.code_package_hash != code_package.hash or rebuilt: - self.logging.info( - f"Cached function {func_name} with hash " - f"{function.code_package_hash} is not up to date with " - f"current build {code_package.hash} in " - f"{code_location}, updating cloud version!" - ) + if function.code_package_hash != code_package.hash: + self.logging.info( + f"Cached function {func_name} with hash " + f"{function.code_package_hash} is not up to date with " + f"current build {code_package.hash} in " + f"{code_location}, updating cloud version!" + ) + if rebuilt: + self.logging.info( + f"Enforcing rebuild and update of of cached function " + f"{func_name} with hash {function.code_package_hash}." + ) self.update_function(function, code_package) function.code_package_hash = code_package.hash function.updated_code = True @@ -200,8 +213,54 @@ def get_function(self, code_package: Benchmark, func_name: Optional[str] = None) function=function, ) code_package.query_cache() + # code up to date, but configuration needs to be updated + # FIXME: detect change in function config + elif self.is_configuration_changed(function, code_package): + self.update_function_configuration(function, code_package) + self.cache_client.update_function(function) + code_package.query_cache() + else: + self.logging.info(f"Cached function {func_name} is up to date.") return function + @abstractmethod + def update_function_configuration(self, cached_function: Function, benchmark: Benchmark): + pass + + """ + This function checks for common function parameters to verify if their value is + still up to date. + """ + + def is_configuration_changed(self, cached_function: Function, benchmark: Benchmark) -> bool: + + changed = False + for attr in ["timeout", "memory"]: + new_val = getattr(benchmark.benchmark_config, attr) + old_val = getattr(cached_function.config, attr) + if new_val != old_val: + self.logging.info( + f"Updating function configuration due to changed attribute {attr}: " + f"cached function has value {old_val} whereas {new_val} has been requested." + ) + changed = True + setattr(cached_function.config, attr, new_val) + + for lang_attr in [["language"] * 2, ["language_version", "version"]]: + new_val = getattr(benchmark, lang_attr[0]) + old_val = getattr(cached_function.config.runtime, lang_attr[1]) + if new_val != old_val: + # FIXME: should this even happen? we should never pick the function with + # different runtime - that should be encoded in the name + self.logging.info( + f"Updating function configuration due to changed runtime attribute {attr}: " + f"cached function has value {old_val} whereas {new_val} has been requested." + ) + changed = True + setattr(cached_function.config.runtime, lang_attr[1], new_val) + + return changed + @abstractmethod def default_function_name(self, code_package: Benchmark) -> str: pass diff --git a/sebs/gcp/function.py b/sebs/gcp/function.py index 80d32096..d9c55a03 100644 --- a/sebs/gcp/function.py +++ b/sebs/gcp/function.py @@ -1,6 +1,6 @@ from typing import cast, Optional -from sebs.faas.function import Function +from sebs.faas.function import Function, FunctionConfig from sebs.gcp.storage import GCPStorage @@ -10,13 +10,10 @@ def __init__( name: str, benchmark: str, code_package_hash: str, - timeout: int, - memory: int, + cfg: FunctionConfig, bucket: Optional[str] = None, ): - super().__init__(benchmark, name, code_package_hash) - self.timeout = timeout - self.memory = memory + super().__init__(benchmark, name, code_package_hash, cfg) self.bucket = bucket @staticmethod @@ -26,8 +23,6 @@ def typename() -> str: def serialize(self) -> dict: return { **super().serialize(), - "timeout": self.timeout, - "memory": self.memory, "bucket": self.bucket, } @@ -36,12 +31,12 @@ def deserialize(cached_config: dict) -> "GCPFunction": from sebs.faas.function import Trigger from sebs.gcp.triggers import LibraryTrigger, HTTPTrigger + cfg = FunctionConfig.deserialize(cached_config["config"]) ret = GCPFunction( cached_config["name"], cached_config["benchmark"], cached_config["hash"], - cached_config["timeout"], - cached_config["memory"], + cfg, cached_config["bucket"], ) for trigger in cached_config["triggers"]: diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index 230a8339..cd97ab9e 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -16,7 +16,7 @@ from sebs.cache import Cache from sebs.config import SeBSConfig from sebs.benchmark import Benchmark -from ..faas.function import Function, Trigger +from ..faas.function import Function, FunctionConfig, Trigger from .storage import PersistentStorage from ..faas.system import System from sebs.gcp.config import GCPConfig @@ -103,9 +103,7 @@ def get_storage( def default_function_name(code_package: Benchmark) -> str: # Create function name func_name = "{}-{}-{}".format( - code_package.benchmark, - code_package.language_name, - code_package.benchmark_config.memory, + code_package.benchmark, code_package.language_name, code_package.language_version ) return GCP.format_function_name(func_name) @@ -131,7 +129,14 @@ def format_function_name(func_name: str) -> str: :return: path to packaged code and its size """ - def package_code(self, directory: str, language_name: str, benchmark: str) -> Tuple[str, int]: + def package_code( + self, + directory: str, + language_name: str, + language_version: str, + benchmark: str, + is_cached: bool, + ) -> Tuple[str, int]: CONFIG_FILES = { "python": ["handler.py", ".python_packages"], @@ -194,6 +199,7 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "GCPFuncti storage_client = self.get_storage() location = self.config.region project_name = self.config.project_name + function_cfg = FunctionConfig.from_benchmark(code_package) code_package_name = cast(str, os.path.basename(package)) code_bucket, idx = storage_client.add_input_bucket(benchmark) @@ -247,7 +253,7 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "GCPFuncti self.logging.info(f"Function {func_name} accepts now unauthenticated invocations!") function = GCPFunction( - func_name, benchmark, code_package.hash, timeout, memory, code_bucket + func_name, benchmark, code_package.hash, function_cfg, code_bucket ) else: # if result is not empty, then function does exists @@ -257,8 +263,7 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "GCPFuncti name=func_name, benchmark=benchmark, code_package_hash=code_package.hash, - timeout=timeout, - memory=memory, + cfg=function_cfg, bucket=code_bucket, ) self.update_function(function, code_package) @@ -336,8 +341,8 @@ def update_function(self, function: Function, code_package: Benchmark): "name": full_func_name, "entryPoint": "handler", "runtime": code_package.language_name + language_runtime.replace(".", ""), - "availableMemoryMb": function.memory, - "timeout": str(function.timeout) + "s", + "availableMemoryMb": function.config.memory, + "timeout": str(function.config.timeout) + "s", "httpsTrigger": {}, "sourceArchiveUrl": "gs://" + bucket + "/" + code_package_name, }, @@ -352,6 +357,33 @@ def update_function(self, function: Function, code_package: Benchmark): break self.logging.info("Published new function code and configuration.") + def update_function_configuration(self, function: Function, benchmark: Benchmark): + function = cast(GCPFunction, function) + full_func_name = GCP.get_full_function_name( + self.config.project_name, self.config.region, function.name + ) + req = ( + self.function_client.projects() + .locations() + .functions() + .patch( + name=full_func_name, + updateMask="availableMemoryMb,timeout", + body={ + "availableMemoryMb": function.config.memory, + "timeout": str(function.config.timeout) + "s", + }, + ) + ) + res = req.execute() + versionId = res["metadata"]["versionId"] + while True: + if not self.is_deployed(function.name, versionId): + time.sleep(5) + else: + break + self.logging.info("Published new function configuration.") + @staticmethod def get_full_function_name(project_name: str, location: str, func_name: str): return f"projects/{project_name}/locations/{location}/functions/{func_name}" diff --git a/sebs/gcp/storage.py b/sebs/gcp/storage.py index 8202cd0e..b59b18e0 100644 --- a/sebs/gcp/storage.py +++ b/sebs/gcp/storage.py @@ -3,6 +3,7 @@ from typing import List from google.cloud import storage as gcp_storage +from google.api_core import exceptions from sebs.cache import Cache from ..faas.storage import PersistentStorage @@ -64,6 +65,13 @@ def upload(self, bucket_name: str, filepath: str, key: str): gcp_storage.blob._MAX_MULTIPART_SIZE = 5 * 1024 * 1024 # workaround for connection timeout blob.upload_from_filename(filepath) + def exists_bucket(self, bucket_name: str) -> bool: + try: + return self.client.bucket(bucket_name).exists() + # 403 returned when the bucket exists but is owned by another user + except exceptions.Forbidden: + return False + def list_bucket(self, bucket_name: str) -> List[str]: bucket_instance = self.client.get_bucket(bucket_name) all_blobs = list(self.client.list_blobs(bucket_instance)) @@ -78,21 +86,10 @@ def list_buckets(self, bucket_name: str) -> List[str]: def clean_bucket(self, bucket: str): raise NotImplementedError() - """ - :param bucket_name: - :return: list of files in a given bucket - """ - - # def list_bucket(self, bucket_name: str) -> List[str]: - # name = "{}-{}".format(bucket_name, suffix) - # bucket_name = self.create_bucket(name) - # return bucket_name - def uploader_func(self, bucket_idx: int, key: str, filepath: str) -> None: if self.cached and not self.replace_existing: return bucket_name = self.input_buckets[bucket_idx] - print(self.input_buckets_files[bucket_idx]) if not self.replace_existing: for blob in self.input_buckets_files[bucket_idx]: if key == blob: diff --git a/sebs/local/config.py b/sebs/local/config.py index 3c5e18ec..5b091664 100644 --- a/sebs/local/config.py +++ b/sebs/local/config.py @@ -1,5 +1,8 @@ +from typing import cast, Optional + from sebs.cache import Cache from sebs.faas.config import Config, Credentials, Resources +from sebs.storage.minio import MinioConfig from sebs.utils import LoggingHandlers @@ -12,13 +15,32 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Creden return LocalCredentials() +""" + No need to cache and store - we prepare the benchmark and finish. + The rest is used later by the user. +""" + + class LocalResources(Resources): + def __init__(self, storage_cfg: Optional[MinioConfig] = None): + super().__init__() + self._storage = storage_cfg + + @property + def storage_config(self) -> Optional[MinioConfig]: + return self._storage + def serialize(self) -> dict: return {} @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resources: - return LocalResources() + ret = LocalResources() + # Check for new config + if "storage" in config: + ret._storage = MinioConfig.deserialize(config["storage"]) + ret.logging.info("Using user-provided configuration of storage for local containers.") + return ret class LocalConfig(Config): @@ -43,10 +65,17 @@ def credentials(self) -> LocalCredentials: def resources(self) -> LocalResources: return self._resources + @resources.setter + def resources(self, val: LocalResources): + self._resources = val + @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config: config_obj = LocalConfig() + config_obj.resources = cast( + LocalResources, LocalResources.deserialize(config, cache, handlers) + ) config_obj.logging_handlers = handlers return config_obj diff --git a/sebs/local/deployment.py b/sebs/local/deployment.py index d3f0e4b7..d23d87d4 100644 --- a/sebs/local/deployment.py +++ b/sebs/local/deployment.py @@ -3,7 +3,7 @@ from sebs.cache import Cache from sebs.local.function import LocalFunction -from sebs.local.storage import Minio +from sebs.storage.minio import Minio, MinioConfig from sebs.utils import serialize @@ -39,10 +39,11 @@ def deserialize(path: str, cache_client: Cache) -> "Deployment": deployment._inputs.append(input_cfg) for func in input_data["functions"]: deployment._functions.append(LocalFunction.deserialize(func)) - deployment._storage = Minio.deserialize(input_data["storage"], cache_client) + deployment._storage = Minio.deserialize( + MinioConfig.deserialize(input_data["storage"]), cache_client + ) return deployment def shutdown(self): for func in self._functions: func.stop() - self._storage.stop() diff --git a/sebs/local/function.py b/sebs/local/function.py index 8bf408be..169cb457 100644 --- a/sebs/local/function.py +++ b/sebs/local/function.py @@ -2,7 +2,7 @@ import docker import json -from sebs.faas.function import ExecutionResult, Function, Trigger +from sebs.faas.function import ExecutionResult, Function, FunctionConfig, Trigger class HTTPTrigger(Trigger): @@ -37,9 +37,15 @@ def deserialize(obj: dict) -> Trigger: class LocalFunction(Function): def __init__( - self, docker_container, port: int, name: str, benchmark: str, code_package_hash: str + self, + docker_container, + port: int, + name: str, + benchmark: str, + code_package_hash: str, + config: FunctionConfig, ): - super().__init__(benchmark, name, code_package_hash) + super().__init__(benchmark, name, code_package_hash, config) self._instance = docker_container self._instance_id = docker_container.id self._instance.reload() @@ -74,12 +80,14 @@ def deserialize(cached_config: dict) -> "LocalFunction": try: instance_id = cached_config["instance_id"] instance = docker.from_env().containers.get(instance_id) + cfg = FunctionConfig.deserialize(cached_config["config"]) return LocalFunction( instance, cached_config["port"], cached_config["name"], cached_config["benchmark"], cached_config["hash"], + cfg, ) except docker.errors.NotFound: raise RuntimeError(f"Cached container {instance_id} not available anymore!") diff --git a/sebs/local/local.py b/sebs/local/local.py index 216f0d41..ad18551e 100644 --- a/sebs/local/local.py +++ b/sebs/local/local.py @@ -4,14 +4,13 @@ import docker -# from sebs.local.minio import Minio from sebs.cache import Cache from sebs.config import SeBSConfig from sebs.utils import LoggingHandlers from sebs.local.config import LocalConfig -from sebs.local.storage import Minio +from sebs.storage.minio import Minio from sebs.local.function import LocalFunction -from sebs.faas.function import Function, ExecutionResult, Trigger +from sebs.faas.function import Function, FunctionConfig, ExecutionResult, Trigger from sebs.faas.storage import PersistentStorage from sebs.faas.system import System from sebs.benchmark import Benchmark @@ -45,14 +44,6 @@ def remove_containers(self) -> bool: def remove_containers(self, val: bool): self._remove_containers = val - @property - def shutdown_storage(self) -> bool: - return self._shutdown_storage - - @shutdown_storage.setter - def shutdown_storage(self, val: bool): - self._shutdown_storage = val - def __init__( self, sebs_config: SeBSConfig, @@ -64,9 +55,7 @@ def __init__( super().__init__(sebs_config, cache_client, docker_client) self.logging_handlers = logger_handlers self._config = config - self._storage_instance: Optional[Minio] = None self._remove_containers = True - self._shutdown_storage = True """ Create wrapper object for minio storage and fill buckets. @@ -79,23 +68,26 @@ def __init__( """ def get_storage(self, replace_existing: bool = False) -> PersistentStorage: - if not self._storage_instance: - self._storage_instance = Minio( - self._docker_client, self._cache_client, replace_existing + if not hasattr(self, "storage"): + + if not self.config.resources.storage_config: + raise RuntimeError( + "The local deployment is missing the configuration of pre-allocated storage!" + ) + self.storage = Minio.deserialize( + self.config.resources.storage_config, self.cache_client ) - self._storage_instance.logging_handlers = self.logging_handlers - self._storage_instance.start() + self.storage.logging_handlers = self.logging_handlers else: - self._storage_instance.replace_existing = replace_existing - return self._storage_instance + self.storage.replace_existing = replace_existing + return self.storage """ Shut down minio storage instance. """ def shutdown(self): - if self._storage_instance and self.shutdown_storage: - self._storage_instance.stop() + pass """ It would be sufficient to just pack the code and ship it as zip to AWS. @@ -115,7 +107,14 @@ def shutdown(self): benchmark: benchmark name """ - def package_code(self, directory: str, language_name: str, benchmark: str) -> Tuple[str, int]: + def package_code( + self, + directory: str, + language_name: str, + language_version: str, + benchmark: str, + is_cached: bool, + ) -> Tuple[str, int]: CONFIG_FILES = { "python": ["handler.py", "requirements.txt", ".python_packages"], @@ -138,34 +137,35 @@ def package_code(self, directory: str, language_name: str, benchmark: str) -> Tu def create_function(self, code_package: Benchmark, func_name: str) -> "LocalFunction": - home_dir = os.path.join( - "/home", self._system_config.username(self.name(), code_package.language_name) - ) container_name = "{}:run.local.{}.{}".format( self._system_config.docker_repository(), code_package.language_name, code_package.language_version, ) environment: Dict[str, str] = {} - if self._storage_instance: + if self.config.resources.storage_config: environment = { - "MINIO_ADDRESS": self._storage_instance._url, - "MINIO_ACCESS_KEY": self._storage_instance._access_key, - "MINIO_SECRET_KEY": self._storage_instance._secret_key, + "MINIO_ADDRESS": self.config.resources.storage_config.address, + "MINIO_ACCESS_KEY": self.config.resources.storage_config.access_key, + "MINIO_SECRET_KEY": self.config.resources.storage_config.secret_key, + "CONTAINER_UID": str(os.getuid()), + "CONTAINER_GID": str(os.getgid()), + "CONTAINER_USER": self._system_config.username( + self.name(), code_package.language_name + ), } container = self._docker_client.containers.run( image=container_name, - command=f"python3 server.py {self.DEFAULT_PORT}", - volumes={ - code_package.code_location: {"bind": os.path.join(home_dir, "code"), "mode": "ro"} - }, + command=f"/bin/bash /sebs/run_server.sh {self.DEFAULT_PORT}", + volumes={code_package.code_location: {"bind": "/function", "mode": "ro"}}, environment=environment, # FIXME: make CPUs configurable + # FIXME: configure memory + # FIXME: configure timeout # cpuset_cpus=cpuset, # required to access perf counters # alternative: use custom seccomp profile privileged=True, - user=os.getuid(), security_opt=["seccomp:unconfined"], network_mode="bridge", # somehow removal of containers prevents checkpointing from working? @@ -175,8 +175,14 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LocalFunc detach=True, # tty=True, ) + function_cfg = FunctionConfig.from_benchmark(code_package) func = LocalFunction( - container, self.DEFAULT_PORT, func_name, code_package.benchmark, code_package.hash + container, + self.DEFAULT_PORT, + func_name, + code_package.benchmark, + code_package.hash, + function_cfg, ) self.logging.info( f"Started {func_name} function at container {container.id} , running on {func._url}" @@ -212,6 +218,10 @@ def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> T def cached_function(self, function: Function): pass + def update_function_configuration(self, function: Function, code_package: Benchmark): + self.logging.error("Updating function configuration of local deployment is not supported") + raise RuntimeError("Updating function configuration of local deployment is not supported") + def download_metrics( self, function_name: str, @@ -229,9 +239,7 @@ def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) def default_function_name(code_package: Benchmark) -> str: # Create function name func_name = "{}-{}-{}".format( - code_package.benchmark, - code_package.language_name, - code_package.benchmark_config.memory, + code_package.benchmark, code_package.language_name, code_package.language_version ) return func_name diff --git a/sebs/openwhisk/__init__.py b/sebs/openwhisk/__init__.py new file mode 100644 index 00000000..614d9443 --- /dev/null +++ b/sebs/openwhisk/__init__.py @@ -0,0 +1,2 @@ +from .openwhisk import OpenWhisk # noqa +from .config import OpenWhiskConfig # noqa diff --git a/sebs/openwhisk/config.py b/sebs/openwhisk/config.py new file mode 100644 index 00000000..dfaad3fc --- /dev/null +++ b/sebs/openwhisk/config.py @@ -0,0 +1,217 @@ +from sebs.cache import Cache +from sebs.faas.config import Credentials, Resources, Config +from sebs.utils import LoggingHandlers +from sebs.storage.config import MinioConfig + +from typing import cast, Optional + + +class OpenWhiskCredentials(Credentials): + @staticmethod + def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Credentials: + return OpenWhiskCredentials() + + def serialize(self) -> dict: + return {} + + +class OpenWhiskResources(Resources): + def __init__( + self, + registry: Optional[str] = None, + username: Optional[str] = None, + password: Optional[str] = None, + registry_updated: bool = False, + ): + super().__init__() + self._docker_registry = registry if registry != "" else None + self._docker_username = username if username != "" else None + self._docker_password = password if password != "" else None + self._registry_updated = registry_updated + self._storage: Optional[MinioConfig] = None + self._storage_updated = False + + @staticmethod + def typename() -> str: + return "OpenWhisk.Resources" + + @property + def docker_registry(self) -> Optional[str]: + return self._docker_registry + + @property + def docker_username(self) -> Optional[str]: + return self._docker_username + + @property + def docker_password(self) -> Optional[str]: + return self._docker_password + + @property + def storage_config(self) -> Optional[MinioConfig]: + return self._storage + + @property + def storage_updated(self) -> bool: + return self._storage_updated + + @property + def registry_updated(self) -> bool: + return self._registry_updated + + @staticmethod + def initialize(dct: dict) -> Resources: + return OpenWhiskResources(dct["registry"], dct["username"], dct["password"]) + + @staticmethod + def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resources: + + cached_config = cache.get_config("openwhisk") + ret: OpenWhiskResources + # Check for new config - overrides but check if it's different + if "docker_registry" in config: + ret = cast(OpenWhiskResources, OpenWhiskResources.initialize(config["docker_registry"])) + ret.logging.info("Using user-provided Docker registry for OpenWhisk.") + ret.logging_handlers = handlers + + # check if there has been an update + if not ( + cached_config + and "resources" in cached_config + and "docker" in cached_config["resources"] + and cached_config["resources"]["docker"] == config["docker_registry"] + ): + ret._registry_updated = True + + # Load cached values + elif ( + cached_config + and "resources" in cached_config + and "docker" in cached_config["resources"] + ): + ret = cast( + OpenWhiskResources, + OpenWhiskResources.initialize(cached_config["resources"]["docker"]), + ) + ret.logging_handlers = handlers + ret.logging.info("Using cached Docker registry for OpenWhisk") + else: + ret = OpenWhiskResources() + ret.logging.info("Using default Docker registry for OpenWhisk.") + ret.logging_handlers = handlers + ret._registry_updated = True + + # Check for new config + if "storage" in config: + ret._storage = MinioConfig.deserialize(config["storage"]) + ret.logging.info("Using user-provided configuration of storage for OpenWhisk.") + + # check if there has been an update + if not ( + cached_config + and "resources" in cached_config + and "storage" in cached_config["resources"] + and cached_config["resources"]["storage"] == config["storage"] + ): + ret.logging.info( + "User-provided configuration is different from cached storage, " + "we will update existing OpenWhisk actions." + ) + ret._storage_updated = True + + # Load cached values + elif ( + cached_config + and "resources" in cached_config + and "storage" in cached_config["resources"] + ): + ret._storage = MinioConfig.deserialize(cached_config["resources"]["storage"]) + ret.logging.info("Using cached configuration of storage for OpenWhisk.") + + return ret + + def update_cache(self, cache: Cache): + cache.update_config( + val=self.docker_registry, keys=["openwhisk", "resources", "docker", "registry"] + ) + cache.update_config( + val=self.docker_username, keys=["openwhisk", "resources", "docker", "username"] + ) + cache.update_config( + val=self.docker_password, keys=["openwhisk", "resources", "docker", "password"] + ) + if self._storage: + self._storage.update_cache(["openwhisk", "resources", "storage"], cache) + + def serialize(self) -> dict: + out: dict = { + "docker_registry": self.docker_registry, + "docker_username": self.docker_username, + "docker_password": self.docker_password, + } + if self._storage: + out = {**out, "storage": self._storage.serialize()} + return out + + +class OpenWhiskConfig(Config): + name: str + shutdownStorage: bool + cache: Cache + + def __init__(self, config: dict, cache: Cache): + super().__init__() + self._credentials = OpenWhiskCredentials() + self._resources = OpenWhiskResources() + self.shutdownStorage = config["shutdownStorage"] + self.removeCluster = config["removeCluster"] + self.wsk_exec = config["wskExec"] + self.wsk_bypass_security = config["wskBypassSecurity"] + self.experimentalManifest = config["experimentalManifest"] + self.cache = cache + + @property + def credentials(self) -> OpenWhiskCredentials: + return self._credentials + + @property + def resources(self) -> OpenWhiskResources: + return self._resources + + @staticmethod + def initialize(cfg: Config, dct: dict): + pass + + def serialize(self) -> dict: + return { + "name": "openwhisk", + "shutdownStorage": self.shutdownStorage, + "removeCluster": self.removeCluster, + "wskExec": self.wsk_exec, + "wskBypassSecurity": self.wsk_bypass_security, + "experimentalManifest": self.experimentalManifest, + "credentials": self._credentials.serialize(), + "resources": self._resources.serialize(), + } + + @staticmethod + def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config: + cached_config = cache.get_config("openwhisk") + resources = cast( + OpenWhiskResources, OpenWhiskResources.deserialize(config, cache, handlers) + ) + + res = OpenWhiskConfig(config, cached_config) + res.logging_handlers = handlers + res._resources = resources + return res + + def update_cache(self, cache: Cache): + cache.update_config(val=self.shutdownStorage, keys=["openwhisk", "shutdownStorage"]) + cache.update_config(val=self.removeCluster, keys=["openwhisk", "removeCluster"]) + cache.update_config(val=self.wsk_exec, keys=["openwhisk", "wskExec"]) + cache.update_config(val=self.wsk_bypass_security, keys=["openwhisk", "wskBypassSecurity"]) + cache.update_config( + val=self.experimentalManifest, keys=["openwhisk", "experimentalManifest"] + ) + self.resources.update_cache(cache) diff --git a/sebs/openwhisk/function.py b/sebs/openwhisk/function.py new file mode 100644 index 00000000..624b1250 --- /dev/null +++ b/sebs/openwhisk/function.py @@ -0,0 +1,70 @@ +from __future__ import annotations + +from typing import cast, Optional +from dataclasses import dataclass + +from sebs.benchmark import Benchmark +from sebs.faas.function import Function, FunctionConfig, Runtime +from sebs.storage.config import MinioConfig + + +@dataclass +class OpenWhiskFunctionConfig(FunctionConfig): + + # FIXME: merge with higher level abstraction for images + docker_image: str = "" + namespace: str = "_" + storage: Optional[MinioConfig] = None + + @staticmethod + def deserialize(data: dict) -> OpenWhiskFunctionConfig: + keys = list(OpenWhiskFunctionConfig.__dataclass_fields__.keys()) + data = {k: v for k, v in data.items() if k in keys} + data["runtime"] = Runtime.deserialize(data["runtime"]) + data["storage"] = MinioConfig.deserialize(data["storage"]) + return OpenWhiskFunctionConfig(**data) + + def serialize(self) -> dict: + return self.__dict__ + + @staticmethod + def from_benchmark(benchmark: Benchmark) -> OpenWhiskFunctionConfig: + return super(OpenWhiskFunctionConfig, OpenWhiskFunctionConfig)._from_benchmark( + benchmark, OpenWhiskFunctionConfig + ) + + +class OpenWhiskFunction(Function): + def __init__( + self, name: str, benchmark: str, code_package_hash: str, cfg: OpenWhiskFunctionConfig + ): + super().__init__(benchmark, name, code_package_hash, cfg) + + @property + def config(self) -> OpenWhiskFunctionConfig: + return cast(OpenWhiskFunctionConfig, self._cfg) + + @staticmethod + def typename() -> str: + return "OpenWhisk.Function" + + def serialize(self) -> dict: + return {**super().serialize(), "config": self._cfg.serialize()} + + @staticmethod + def deserialize(cached_config: dict) -> OpenWhiskFunction: + from sebs.faas.function import Trigger + from sebs.openwhisk.triggers import LibraryTrigger, HTTPTrigger + + cfg = OpenWhiskFunctionConfig.deserialize(cached_config["config"]) + ret = OpenWhiskFunction( + cached_config["name"], cached_config["benchmark"], cached_config["hash"], cfg + ) + for trigger in cached_config["triggers"]: + trigger_type = cast( + Trigger, + {"Library": LibraryTrigger, "HTTP": HTTPTrigger}.get(trigger["type"]), + ) + assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) + ret.add_trigger(trigger_type.deserialize(trigger)) + return ret diff --git a/sebs/openwhisk/openwhisk.py b/sebs/openwhisk/openwhisk.py new file mode 100644 index 00000000..0337a9bb --- /dev/null +++ b/sebs/openwhisk/openwhisk.py @@ -0,0 +1,444 @@ +import os +import shutil +import subprocess +from typing import cast, Dict, List, Tuple, Type + +import docker + +from sebs.benchmark import Benchmark +from sebs.cache import Cache +from sebs.faas import System, PersistentStorage +from sebs.faas.function import Function, ExecutionResult, Trigger +from sebs.openwhisk.storage import Minio +from sebs.openwhisk.triggers import LibraryTrigger, HTTPTrigger +from sebs.utils import PROJECT_DIR, LoggingHandlers, execute +from .config import OpenWhiskConfig +from .function import OpenWhiskFunction, OpenWhiskFunctionConfig +from ..config import SeBSConfig + + +class OpenWhisk(System): + _config: OpenWhiskConfig + + def __init__( + self, + system_config: SeBSConfig, + config: OpenWhiskConfig, + cache_client: Cache, + docker_client: docker.client, + logger_handlers: LoggingHandlers, + ): + super().__init__(system_config, cache_client, docker_client) + self._config = config + self.logging_handlers = logger_handlers + + if self.config.resources.docker_username: + if self.config.resources.docker_registry: + docker_client.login( + username=self.config.resources.docker_username, + password=self.config.resources.docker_password, + registry=self.config.resources.docker_registry, + ) + else: + docker_client.login( + username=self.config.resources.docker_username, + password=self.config.resources.docker_password, + ) + + @property + def config(self) -> OpenWhiskConfig: + return self._config + + def get_storage(self, replace_existing: bool = False) -> PersistentStorage: + if not hasattr(self, "storage"): + + if not self.config.resources.storage_config: + raise RuntimeError( + "OpenWhisk is missing the configuration of pre-allocated storage!" + ) + self.storage = Minio.deserialize( + self.config.resources.storage_config, self.cache_client + ) + self.storage.logging_handlers = self.logging_handlers + else: + self.storage.replace_existing = replace_existing + return self.storage + + def shutdown(self) -> None: + if hasattr(self, "storage") and self.config.shutdownStorage: + self.storage.stop() + if self.config.removeCluster: + from tools.openwhisk_preparation import delete_cluster # type: ignore + + delete_cluster() + super().shutdown() + + @staticmethod + def name() -> str: + return "openwhisk" + + @staticmethod + def typename(): + return "OpenWhisk" + + @staticmethod + def function_type() -> "Type[Function]": + return OpenWhiskFunction + + def get_wsk_cmd(self) -> List[str]: + cmd = [self.config.wsk_exec] + if self.config.wsk_bypass_security: + cmd.append("-i") + return cmd + + def find_image(self, repository_name, image_tag) -> bool: + + if self.config.experimentalManifest: + try: + # This requires enabling experimental Docker features + # Furthermore, it's not yet supported in the Python library + execute(f"docker manifest inspect {repository_name}:{image_tag}") + return True + except RuntimeError: + return False + else: + try: + # default version requires pulling for an image + self.docker_client.images.pull(repository=repository_name, tag=image_tag) + return True + except docker.errors.NotFound: + return False + + def build_base_image( + self, + directory: str, + language_name: str, + language_version: str, + benchmark: str, + is_cached: bool, + ) -> bool: + """ + When building function for the first time (according to SeBS cache), + check if Docker image is available in the registry. + If yes, then skip building. + If no, then continue building. + + For every subsequent build, we rebuild image and push it to the + registry. These are triggered by users modifying code and enforcing + a build. + """ + + # We need to retag created images when pushing to registry other + # than default + registry_name = self.config.resources.docker_registry + repository_name = self.system_config.docker_repository() + image_tag = self.system_config.benchmark_image_tag( + self.name(), benchmark, language_name, language_version + ) + if registry_name is not None: + repository_name = f"{registry_name}/{repository_name}" + else: + registry_name = "Docker Hub" + + # Check if we the image is already in the registry. + if not is_cached: + if self.find_image(repository_name, image_tag): + self.logging.info( + f"Skipping building OpenWhisk Docker package for {benchmark}, using " + f"Docker image {repository_name}:{image_tag} from registry: " + f"{registry_name}." + ) + return False + else: + # image doesn't exist, let's continue + self.logging.info( + f"Image {repository_name}:{image_tag} doesn't exist in the registry, " + f"building OpenWhisk package for {benchmark}." + ) + + build_dir = os.path.join(directory, "docker") + os.makedirs(build_dir) + shutil.copy( + os.path.join(PROJECT_DIR, "docker", self.name(), language_name, "Dockerfile.function"), + os.path.join(build_dir, "Dockerfile"), + ) + + for fn in os.listdir(directory): + if fn not in ("index.js", "__main__.py"): + file = os.path.join(directory, fn) + shutil.move(file, build_dir) + + with open(os.path.join(build_dir, ".dockerignore"), "w") as f: + f.write("Dockerfile") + + builder_image = self.system_config.benchmark_base_images(self.name(), language_name)[ + language_version + ] + self.logging.info(f"Build the benchmark base image {repository_name}:{image_tag}.") + + buildargs = {"VERSION": language_version, "BASE_IMAGE": builder_image} + image, _ = self.docker_client.images.build( + tag=f"{repository_name}:{image_tag}", path=build_dir, buildargs=buildargs + ) + + # Now push the image to the registry + # image will be located in a private repository + self.logging.info( + f"Push the benchmark base image {repository_name}:{image_tag} " + f"to registry: {registry_name}." + ) + ret = self.docker_client.images.push( + repository=repository_name, tag=image_tag, stream=True, decode=True + ) + # doesn't raise an exception for some reason + for val in ret: + if "error" in val: + self.logging.error(f"Failed to push the image to registry {registry_name}") + raise RuntimeError(val) + return True + + def package_code( + self, + directory: str, + language_name: str, + language_version: str, + benchmark: str, + is_cached: bool, + ) -> Tuple[str, int]: + + # Regardless of Docker image status, we need to create .zip file + # to allow registration of function with OpenWhisk + self.build_base_image(directory, language_name, language_version, benchmark, is_cached) + + # We deploy Minio config in code package since this depends on local + # deployment - it cannnot be a part of Docker image + CONFIG_FILES = { + "python": ["__main__.py"], + "nodejs": ["index.js"], + } + package_config = CONFIG_FILES[language_name] + + benchmark_archive = os.path.join(directory, f"{benchmark}.zip") + subprocess.run( + ["zip", benchmark_archive] + package_config, stdout=subprocess.DEVNULL, cwd=directory + ) + self.logging.info(f"Created {benchmark_archive} archive") + bytes_size = os.path.getsize(benchmark_archive) + self.logging.info("Zip archive size {:2f} MB".format(bytes_size / 1024.0 / 1024.0)) + return benchmark_archive, bytes_size + + def storage_arguments(self) -> List[str]: + storage = cast(Minio, self.get_storage()) + return [ + "-p", + "MINIO_STORAGE_SECRET_KEY", + storage.config.secret_key, + "-p", + "MINIO_STORAGE_ACCESS_KEY", + storage.config.access_key, + "-p", + "MINIO_STORAGE_CONNECTION_URL", + storage.config.address, + ] + + def create_function(self, code_package: Benchmark, func_name: str) -> "OpenWhiskFunction": + self.logging.info("Creating function as an action in OpenWhisk.") + try: + actions = subprocess.run( + [*self.get_wsk_cmd(), "action", "list"], + stderr=subprocess.DEVNULL, + stdout=subprocess.PIPE, + ) + + function_found = False + docker_image = "" + for line in actions.stdout.decode().split("\n"): + if line and func_name in line.split()[0]: + function_found = True + break + + function_cfg = OpenWhiskFunctionConfig.from_benchmark(code_package) + function_cfg.storage = cast(Minio, self.get_storage()).config + if function_found: + # docker image is overwritten by the update + res = OpenWhiskFunction( + func_name, code_package.benchmark, code_package.hash, function_cfg + ) + # Update function - we don't know what version is stored + self.logging.info(f"Retrieved existing OpenWhisk action {func_name}.") + self.update_function(res, code_package) + else: + try: + self.logging.info(f"Creating new OpenWhisk action {func_name}") + docker_image = self.system_config.benchmark_image_name( + self.name(), + code_package.benchmark, + code_package.language_name, + code_package.language_version, + ) + subprocess.run( + [ + *self.get_wsk_cmd(), + "action", + "create", + func_name, + "--web", + "true", + "--docker", + docker_image, + "--memory", + str(code_package.benchmark_config.memory), + "--timeout", + str(code_package.benchmark_config.timeout * 1000), + *self.storage_arguments(), + code_package.code_location, + ], + stderr=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + check=True, + ) + function_cfg.docker_image = docker_image + res = OpenWhiskFunction( + func_name, code_package.benchmark, code_package.hash, function_cfg + ) + except subprocess.CalledProcessError as e: + self.logging.error(f"Cannot create action {func_name}.") + raise RuntimeError(e) + + except FileNotFoundError: + self.logging.error("Could not retrieve OpenWhisk functions - is path to wsk correct?") + raise RuntimeError("Failed to access wsk binary") + + # Add LibraryTrigger to a new function + trigger = LibraryTrigger(func_name, self.get_wsk_cmd()) + trigger.logging_handlers = self.logging_handlers + res.add_trigger(trigger) + + return res + + def update_function(self, function: Function, code_package: Benchmark): + self.logging.info(f"Update an existing OpenWhisk action {function.name}.") + function = cast(OpenWhiskFunction, function) + docker_image = self.system_config.benchmark_image_name( + self.name(), + code_package.benchmark, + code_package.language_name, + code_package.language_version, + ) + try: + subprocess.run( + [ + *self.get_wsk_cmd(), + "action", + "update", + function.name, + "--web", + "true", + "--docker", + docker_image, + "--memory", + str(code_package.benchmark_config.memory), + "--timeout", + str(code_package.benchmark_config.timeout * 1000), + *self.storage_arguments(), + code_package.code_location, + ], + stderr=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + check=True, + ) + function.config.docker_image = docker_image + + except FileNotFoundError as e: + self.logging.error("Could not update OpenWhisk function - is path to wsk correct?") + raise RuntimeError(e) + + def update_function_configuration(self, function: Function, code_package: Benchmark): + self.logging.info(f"Update configuration of an existing OpenWhisk action {function.name}.") + try: + subprocess.run( + [ + *self.get_wsk_cmd(), + "action", + "update", + function.name, + "--memory", + str(code_package.benchmark_config.memory), + "--timeout", + str(code_package.benchmark_config.timeout * 1000), + *self.storage_arguments(), + ], + stderr=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + check=True, + ) + except FileNotFoundError as e: + self.logging.error("Could not update OpenWhisk function - is path to wsk correct?") + raise RuntimeError(e) + + def is_configuration_changed(self, cached_function: Function, benchmark: Benchmark) -> bool: + changed = super().is_configuration_changed(cached_function, benchmark) + + storage = cast(Minio, self.get_storage()) + function = cast(OpenWhiskFunction, cached_function) + # check if now we're using a new storage + if function.config.storage != storage.config: + self.logging.info( + "Updating function configuration due to changed storage configuration." + ) + changed = True + function.config.storage = storage.config + + return changed + + def default_function_name(self, code_package: Benchmark) -> str: + return ( + f"{code_package.benchmark}-{code_package.language_name}-" + f"{code_package.language_version}" + ) + + def enforce_cold_start(self, functions: List[Function], code_package: Benchmark): + raise NotImplementedError() + + def download_metrics( + self, + function_name: str, + start_time: int, + end_time: int, + requests: Dict[str, ExecutionResult], + metrics: dict, + ): + pass + + def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: + if trigger_type == Trigger.TriggerType.LIBRARY: + return function.triggers(Trigger.TriggerType.LIBRARY)[0] + elif trigger_type == Trigger.TriggerType.HTTP: + try: + response = subprocess.run( + [*self.get_wsk_cmd(), "action", "get", function.name, "--url"], + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + check=True, + ) + except FileNotFoundError as e: + self.logging.error( + "Could not retrieve OpenWhisk configuration - is path to wsk correct?" + ) + raise RuntimeError(e) + stdout = response.stdout.decode("utf-8") + url = stdout.strip().split("\n")[-1] + ".json" + trigger = HTTPTrigger(function.name, url) + trigger.logging_handlers = self.logging_handlers + function.add_trigger(trigger) + self.cache_client.update_function(function) + return trigger + else: + raise RuntimeError("Not supported!") + + def cached_function(self, function: Function): + for trigger in function.triggers(Trigger.TriggerType.LIBRARY): + trigger.logging_handlers = self.logging_handlers + cast(LibraryTrigger, trigger).wsk_cmd = self.get_wsk_cmd() + for trigger in function.triggers(Trigger.TriggerType.HTTP): + trigger.logging_handlers = self.logging_handlers diff --git a/sebs/openwhisk/storage.py b/sebs/openwhisk/storage.py new file mode 100644 index 00000000..d94182c4 --- /dev/null +++ b/sebs/openwhisk/storage.py @@ -0,0 +1,18 @@ +import docker + +from sebs.storage import minio +from sebs.storage.config import MinioConfig +from sebs.cache import Cache + + +class Minio(minio.Minio): + @staticmethod + def deployment_name() -> str: + return "openwhisk" + + def __init__(self, docker_client: docker.client, cache_client: Cache, replace_existing: bool): + super().__init__(docker_client, cache_client, replace_existing) + + @staticmethod + def deserialize(cached_config: MinioConfig, cache_client: Cache) -> "Minio": + return super(Minio, Minio)._deserialize(cached_config, cache_client, Minio) diff --git a/sebs/openwhisk/triggers.py b/sebs/openwhisk/triggers.py new file mode 100644 index 00000000..f0d8260b --- /dev/null +++ b/sebs/openwhisk/triggers.py @@ -0,0 +1,111 @@ +import concurrent.futures +import datetime +import json +import subprocess +from typing import Dict, List, Optional # noqa + +from sebs.faas.function import ExecutionResult, Trigger + + +class LibraryTrigger(Trigger): + def __init__(self, fname: str, wsk_cmd: Optional[List[str]] = None): + super().__init__() + self.fname = fname + if wsk_cmd: + self._wsk_cmd = [*wsk_cmd, "action", "invoke", "--result", self.fname] + + @staticmethod + def trigger_type() -> "Trigger.TriggerType": + return Trigger.TriggerType.LIBRARY + + @property + def wsk_cmd(self) -> List[str]: + assert self._wsk_cmd + return self._wsk_cmd + + @wsk_cmd.setter + def wsk_cmd(self, wsk_cmd: List[str]): + self._wsk_cmd = [*wsk_cmd, "action", "invoke", "--result", self.fname] + + @staticmethod + def get_command(payload: dict) -> List[str]: + params = [] + for key, value in payload.items(): + params.append("--param") + params.append(key) + params.append(json.dumps(value)) + return params + + def sync_invoke(self, payload: dict) -> ExecutionResult: + command = self.wsk_cmd + self.get_command(payload) + error = None + try: + begin = datetime.datetime.now() + response = subprocess.run( + command, + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + check=True, + ) + end = datetime.datetime.now() + parsed_response = response.stdout.decode("utf-8") + except (subprocess.CalledProcessError, FileNotFoundError) as e: + end = datetime.datetime.now() + error = e + + openwhisk_result = ExecutionResult.from_times(begin, end) + if error is not None: + self.logging.error("Invocation of {} failed!".format(self.fname)) + openwhisk_result.stats.failure = True + return openwhisk_result + + return_content = json.loads(parsed_response) + openwhisk_result.parse_benchmark_output(return_content) + return openwhisk_result + + def async_invoke(self, payload: dict) -> concurrent.futures.Future: + pool = concurrent.futures.ThreadPoolExecutor() + fut = pool.submit(self.sync_invoke, payload) + return fut + + def serialize(self) -> dict: + return {"type": "Library", "name": self.fname} + + @staticmethod + def deserialize(obj: dict) -> Trigger: + return LibraryTrigger(obj["name"]) + + @staticmethod + def typename() -> str: + return "OpenWhisk.LibraryTrigger" + + +class HTTPTrigger(Trigger): + def __init__(self, fname: str, url: str): + super().__init__() + self.fname = fname + self.url = url + + @staticmethod + def typename() -> str: + return "OpenWhisk.HTTPTrigger" + + @staticmethod + def trigger_type() -> Trigger.TriggerType: + return Trigger.TriggerType.HTTP + + def sync_invoke(self, payload: dict) -> ExecutionResult: + self.logging.debug(f"Invoke function {self.url}") + return self._http_invoke(payload, self.url, False) + + def async_invoke(self, payload: dict) -> concurrent.futures.Future: + pool = concurrent.futures.ThreadPoolExecutor() + fut = pool.submit(self.sync_invoke, payload) + return fut + + def serialize(self) -> dict: + return {"type": "HTTP", "fname": self.fname, "url": self.url} + + @staticmethod + def deserialize(obj: dict) -> Trigger: + return HTTPTrigger(obj["fname"], obj["url"]) diff --git a/sebs/sebs.py b/sebs/sebs.py index 4562c7bb..1a7aa65f 100644 --- a/sebs/sebs.py +++ b/sebs/sebs.py @@ -2,11 +2,14 @@ import docker +import sebs.storage +from sebs import types from sebs.local import Local from sebs.cache import Cache from sebs.config import SeBSConfig from sebs.benchmark import Benchmark from sebs.faas.system import System as FaaSSystem +from sebs.faas.storage import PersistentStorage from sebs.faas.config import Config from sebs.utils import has_platform, LoggingHandlers, LoggingBase @@ -35,6 +38,10 @@ def verbose(self) -> bool: def logging_filename(self) -> Optional[str]: return self._logging_filename + @property + def config(self) -> SeBSConfig: + return self._config + def generate_logging_handlers(self, logging_filename: Optional[str] = None) -> LoggingHandlers: filename = logging_filename if logging_filename else self.logging_filename if filename in self._handlers: @@ -76,8 +83,8 @@ def get_deployment( deployment_config: Optional[Config] = None, ) -> FaaSSystem: name = config["name"] - implementations: Dict[str, Type[FaaSSystem]] = {"local": Local} + if has_platform("aws"): from sebs.aws import AWS @@ -90,6 +97,10 @@ def get_deployment( from sebs.gcp import GCP implementations["gcp"] = GCP + if has_platform("openwhisk"): + from sebs.openwhisk import OpenWhisk + + implementations["openwhisk"] = OpenWhisk if name not in implementations: raise RuntimeError("Deployment {name} not supported!".format(name=name)) @@ -164,6 +175,20 @@ def get_benchmark( ) return benchmark + @staticmethod + def get_storage_implementation(storage_type: types.Storage) -> Type[PersistentStorage]: + _storage_implementations = {types.Storage.MINIO: sebs.storage.minio.Minio} + impl = _storage_implementations.get(storage_type) + assert impl + return impl + + @staticmethod + def get_storage_config_implementation(storage_type: types.Storage): + _storage_implementations = {types.Storage.MINIO: sebs.storage.config.MinioConfig} + impl = _storage_implementations.get(storage_type) + assert impl + return impl + def shutdown(self): self.cache_client.shutdown() diff --git a/sebs/storage/__init__.py b/sebs/storage/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/sebs/storage/config.py b/sebs/storage/config.py new file mode 100644 index 00000000..8e0d6b8c --- /dev/null +++ b/sebs/storage/config.py @@ -0,0 +1,30 @@ +from typing import List + +from dataclasses import dataclass, field + +from sebs.cache import Cache + + +@dataclass +class MinioConfig: + address: str = "" + mapped_port: int = -1 + access_key: str = "" + secret_key: str = "" + instance_id: str = "" + input_buckets: List[str] = field(default_factory=list) + output_buckets: List[str] = field(default_factory=list) + type: str = "minio" + + def update_cache(self, path: List[str], cache: Cache): + for key in MinioConfig.__dataclass_fields__.keys(): + cache.update_config(val=getattr(self, key), keys=[*path, key]) + + @staticmethod + def deserialize(data: dict) -> "MinioConfig": + keys = list(MinioConfig.__dataclass_fields__.keys()) + data = {k: v for k, v in data.items() if k in keys} + return MinioConfig(**data) + + def serialize(self) -> dict: + return self.__dict__ diff --git a/sebs/local/storage.py b/sebs/storage/minio.py similarity index 52% rename from sebs/local/storage.py rename to sebs/storage/minio.py index c34f4c0d..6c79f05d 100644 --- a/sebs/local/storage.py +++ b/sebs/storage/minio.py @@ -2,23 +2,25 @@ import os import secrets import uuid -from typing import List, Optional +from typing import List, Optional, Type, TypeVar import docker import minio from sebs.cache import Cache -from ..faas.storage import PersistentStorage +from sebs.types import Storage as StorageTypes +from sebs.faas.storage import PersistentStorage +from sebs.storage.config import MinioConfig class Minio(PersistentStorage): @staticmethod def typename() -> str: - return "Local.Minio" + return f"{Minio.deployment_name()}.Minio" @staticmethod - def deployment_name(): - return "local" + def deployment_name() -> str: + return "minio" # the location does not matter MINIO_REGION = "us-east-1" @@ -26,29 +28,56 @@ def deployment_name(): def __init__(self, docker_client: docker.client, cache_client: Cache, replace_existing: bool): super().__init__(self.MINIO_REGION, cache_client, replace_existing) self._docker_client = docker_client - self._port = 9000 self._storage_container: Optional[docker.container] = None + self._cfg = MinioConfig() - def start(self): - self._access_key = secrets.token_urlsafe(32) - self._secret_key = secrets.token_hex(32) - self.logging.info("Minio storage ACCESS_KEY={}".format(self._access_key)) - self.logging.info("Minio storage SECRET_KEY={}".format(self._secret_key)) + @property + def config(self) -> MinioConfig: + return self._cfg + + @staticmethod + def _define_http_client(): + """ + Minio does not allow another way of configuring timeout for connection. + The rest of configuration is copied from source code of Minio. + """ + import urllib3 + from datetime import timedelta + + timeout = timedelta(seconds=1).seconds + + return urllib3.PoolManager( + timeout=urllib3.util.Timeout(connect=timeout, read=timeout), + maxsize=10, + retries=urllib3.Retry( + total=5, backoff_factor=0.2, status_forcelist=[500, 502, 503, 504] + ), + ) + + def start(self, port: int = 9000): + + self._cfg.mapped_port = port + self._cfg.access_key = secrets.token_urlsafe(32) + self._cfg.secret_key = secrets.token_hex(32) + self._cfg.address = "" + self.logging.info("Minio storage ACCESS_KEY={}".format(self._cfg.access_key)) + self.logging.info("Minio storage SECRET_KEY={}".format(self._cfg.secret_key)) try: self._storage_container = self._docker_client.containers.run( "minio/minio:latest", command="server /data", - # ports={str(self._port): self._port}, network_mode="bridge", + ports={"9000": str(self._cfg.mapped_port)}, environment={ - "MINIO_ACCESS_KEY": self._access_key, - "MINIO_SECRET_KEY": self._secret_key, + "MINIO_ACCESS_KEY": self._cfg.access_key, + "MINIO_SECRET_KEY": self._cfg.secret_key, }, remove=True, stdout=True, stderr=True, detach=True, ) + self._cfg.instance_id = self._storage_container.id self.configure_connection() except docker.errors.APIError as e: self.logging.error("Starting Minio storage failed! Reason: {}".format(e)) @@ -59,31 +88,38 @@ def start(self): def configure_connection(self): # who knows why? otherwise attributes are not loaded - self._storage_container.reload() - networks = self._storage_container.attrs["NetworkSettings"]["Networks"] - self._url = "{IPAddress}:{Port}".format( - IPAddress=networks["bridge"]["IPAddress"], Port=self._port - ) - if not self._url: - self.logging.error( - f"Couldn't read the IP address of container from attributes " - f"{json.dumps(self._instance.attrs, indent=2)}" - ) - raise RuntimeError( - f"Incorrect detection of IP address for container with id {self._instance_id}" + if self._cfg.address == "": + self._storage_container.reload() + networks = self._storage_container.attrs["NetworkSettings"]["Networks"] + self._cfg.address = "{IPAddress}:{Port}".format( + IPAddress=networks["bridge"]["IPAddress"], Port=9000 ) - self.logging.info("Starting minio instance at {}".format(self._url)) + if not self._cfg.address: + self.logging.error( + f"Couldn't read the IP address of container from attributes " + f"{json.dumps(self._instance.attrs, indent=2)}" + ) + raise RuntimeError( + f"Incorrect detection of IP address for container with id {self._instance_id}" + ) + self.logging.info("Starting minio instance at {}".format(self._cfg.address)) self.connection = self.get_connection() def stop(self): if self._storage_container is not None: - self.logging.info("Stopping minio container at {url}".format(url=self._url)) + self.logging.info(f"Stopping minio container at {self._cfg.address}.") self._storage_container.stop() - self.logging.info("Stopped minio container at {url}".format(url=self._url)) + self.logging.info(f"Stopped minio container at {self._cfg.address}.") + else: + self.logging.error("Stopping minio was not succesful, storage container not known!") def get_connection(self): return minio.Minio( - self._url, access_key=self._access_key, secret_key=self._secret_key, secure=False + self._cfg.address, + access_key=self._cfg.access_key, + secret_key=self._cfg.secret_key, + secure=False, + http_client=Minio._define_http_client(), ) def _create_bucket(self, name: str, buckets: List[str] = []): @@ -145,10 +181,16 @@ def correct_name(self, name: str) -> str: def download(self, bucket_name: str, key: str, filepath: str): raise NotImplementedError() - def list_bucket(self, bucket_name: str): - objects_list = self.connection.list_objects(bucket_name) - objects: List[str] - return [obj.object_name for obj in objects_list] + def exists_bucket(self, bucket_name: str) -> bool: + return self.connection.bucket_exists(bucket_name) + + def list_bucket(self, bucket_name: str) -> List[str]: + try: + objects_list = self.connection.list_objects(bucket_name) + objects: List[str] + return [obj.object_name for obj in objects_list] + except minio.error.NoSuchBucket: + raise RuntimeError(f"Attempting to access a non-existing bucket {bucket_name}!") def list_buckets(self, bucket_name: str) -> List[str]: buckets = self.connection.list_buckets() @@ -158,30 +200,31 @@ def upload(self, bucket_name: str, filepath: str, key: str): raise NotImplementedError() def serialize(self) -> dict: - if self._storage_container is not None: - return { - "instance_id": self._storage_container.id, - "address": self._url, - "secret_key": self._secret_key, - "access_key": self._access_key, - "input": self.input_buckets, - "output": self.output_buckets, - } + return { + **self._cfg.serialize(), + "type": StorageTypes.MINIO, + } + + T = TypeVar("T", bound="Minio") + + @staticmethod + def _deserialize(cached_config: MinioConfig, cache_client: Cache, obj_type: Type[T]) -> T: + docker_client = docker.from_env() + obj = obj_type(docker_client, cache_client, False) + obj._cfg = cached_config + if cached_config.instance_id: + instance_id = cached_config.instance_id + try: + obj._storage_container = docker_client.containers.get(instance_id) + except docker.errors.NotFound: + raise RuntimeError(f"Storage container {instance_id} does not exist!") else: - return {} + obj._storage_container = None + obj.input_buckets = cached_config.input_buckets + obj.output_buckets = cached_config.output_buckets + obj.configure_connection() + return obj @staticmethod - def deserialize(cached_config: dict, cache_client: Cache) -> "Minio": - try: - instance_id = cached_config["instance_id"] - docker_client = docker.from_env() - obj = Minio(docker_client, cache_client, False) - obj._storage_container = docker_client.containers.get(instance_id) - obj._url = cached_config["address"] - obj._access_key = cached_config["access_key"] - obj._secret_key = cached_config["secret_key"] - obj.input_buckets = cached_config["input"] - obj.output_buckets = cached_config["output"] - return obj - except docker.errors.NotFound: - raise RuntimeError(f"Cached container {instance_id} not available anymore!") + def deserialize(cached_config: MinioConfig, cache_client: Cache) -> "Minio": + return Minio._deserialize(cached_config, cache_client, Minio) diff --git a/sebs/types.py b/sebs/types.py new file mode 100644 index 00000000..43574337 --- /dev/null +++ b/sebs/types.py @@ -0,0 +1,16 @@ +from enum import Enum + + +class Platforms(str, Enum): + AWS = ("aws",) + AZURE = ("azure",) + GCP = ("gcp",) + LOCAL = ("local",) + OPENWHISK = "openwhisk" + + +class Storage(str, Enum): + AWS_S3 = ("aws-s3",) + AZURE_BLOB_STORAGE = ("azure-blob-storage",) + GCP_STORAGE = ("google-cloud-storage",) + MINIO = "minio" diff --git a/sebs/utils.py b/sebs/utils.py index eff58511..f79278b3 100644 --- a/sebs/utils.py +++ b/sebs/utils.py @@ -186,3 +186,16 @@ def logging_handlers(self, handlers: LoggingHandlers): def has_platform(name: str) -> bool: return os.environ.get(f"SEBS_WITH_{name.upper()}", "False").lower() == "true" + + +def catch_interrupt(): + + import signal + import sys + import traceback + + def handler(x, y): + traceback.print_stack() + sys.exit(signal.SIGINT) + + signal.signal(signal.SIGINT, handler) diff --git a/sebs/version.py b/sebs/version.py new file mode 100644 index 00000000..6849410a --- /dev/null +++ b/sebs/version.py @@ -0,0 +1 @@ +__version__ = "1.1.0" diff --git a/templates/mycluster.yaml b/templates/mycluster.yaml new file mode 100644 index 00000000..24ca450a --- /dev/null +++ b/templates/mycluster.yaml @@ -0,0 +1,12 @@ +whisk: + ingress: + type: NodePort + apiHostName: {{ apiHost.name }} + apiHostPort: {{ apiHost.port }} + +invoker: + containerFactory: + impl: "kubernetes" + +nginx: + httpsNodePort: {{ apiHost.port }} diff --git a/tools/build_docker_images.py b/tools/build_docker_images.py index 52ae9e9d..8f1eb320 100755 --- a/tools/build_docker_images.py +++ b/tools/build_docker_images.py @@ -4,91 +4,92 @@ import docker import json import os -import shutil PROJECT_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), os.path.pardir) -DOCKER_DIR = os.path.join(PROJECT_DIR, 'docker') +DOCKER_DIR = os.path.join(PROJECT_DIR, "docker") -parser = argparse.ArgumentParser(description='Run local app experiments.') -parser.add_argument('--deployment', default=None, choices=['local', 'aws', 'azure', 'gcp'], action='store') -parser.add_argument('--type', default=None, choices=['build', 'run', 'manage'], action='store') -parser.add_argument('--language', default=None, choices=['python', 'nodejs'], action='store') +parser = argparse.ArgumentParser(description="Run local app experiments.") +parser.add_argument( + "--deployment", default=None, choices=["local", "aws", "azure", "gcp"], action="store" +) +parser.add_argument("--type", default=None, choices=["build", "run", "manage"], action="store") +parser.add_argument("--language", default=None, choices=["python", "nodejs"], action="store") args = parser.parse_args() -config = json.load(open(os.path.join(PROJECT_DIR, 'config', 'systems.json'), 'r')) +config = json.load(open(os.path.join(PROJECT_DIR, "config", "systems.json"), "r")) client = docker.from_env() -def build(image_type, system, username, language=None,version=None, version_name=None): - msg = 'Build *{}* Dockerfile for *{}* system'.format(image_type, system) +def build(image_type, system, language=None, version=None, version_name=None): + + msg = "Build *{}* Dockerfile for *{}* system".format(image_type, system) if language: - msg += ' with language *' + language + '*' + msg += " with language *" + language + "*" if version: - msg += ' with version *' + version + '*' + msg += " with version *" + version + "*" print(msg) - dockerfile = os.path.join(PROJECT_DIR, 'docker', 'Dockerfile.{}.{}'.format(image_type, system)) + if language is not None: + dockerfile = os.path.join(PROJECT_DIR, "docker", system, language, f"Dockerfile.{image_type}") + else: + dockerfile = os.path.join(PROJECT_DIR, "docker", system, f"Dockerfile.{image_type}") target = f'{config["general"]["docker_repository"]}:{image_type}.{system}' if language: - dockerfile += '.' + language - target += '.' + language + target += "." + language if version: - target += '.' + version + target += "." + version # if we pass an integer, the build will fail with 'connection reset by peer' - buildargs={ - 'USER': username, - 'VERSION': version, - 'UID': str(os.getuid()) + buildargs = { + "VERSION": version, } if version: - buildargs['BASE_IMAGE'] = version_name - print('Build img {} in {} from file {} with args {}'.format(target, PROJECT_DIR, dockerfile, buildargs)) - client.images.build( - path=PROJECT_DIR, - dockerfile=dockerfile, - buildargs=buildargs, - tag=target + buildargs["BASE_IMAGE"] = version_name + print( + "Build img {} in {} from file {} with args {}".format( + target, PROJECT_DIR, dockerfile, buildargs + ) ) + client.images.build(path=PROJECT_DIR, dockerfile=dockerfile, buildargs=buildargs, tag=target) + def build_language(system, language, language_config): - username = language_config['username'] configs = [] - if 'base_images' in language_config: - for version, base_image in language_config['base_images'].items(): + if "base_images" in language_config: + for version, base_image in language_config["base_images"].items(): configs.append([version, base_image]) else: configs.append([None, None]) for image in configs: if args.type is None: - for image_type in language_config['images']: - build(image_type, system, username, language, *image) + for image_type in language_config["images"]: + build(image_type, system, language, *image) else: - build(args.type, system, username, language, *image) + build(args.type, system, language, *image) + def build_systems(system, system_config): - if args.type == 'manage': - if 'images' in system_config: - build(args.type, system, system_config['images']['manage']['username']) + if args.type == "manage": + if "images" in system_config: + build(args.type, system) else: - print(f'Skipping manage image for {system}') + print(f"Skipping manage image for {system}") else: if args.language: - build_language(system, args.language, system_config['languages'][args.language]) + build_language(system, args.language, system_config["languages"][args.language]) else: - for language, language_dict in system_config['languages'].items(): + for language, language_dict in system_config["languages"].items(): build_language(system, language, language_dict) # Build additional types - if 'images' in system_config: - for image_type, image_config in system_config['images'].items(): - build(image_type, system, image_config['username']) + if "images" in system_config: + for image_type, image_config in system_config["images"].items(): + build(image_type, system) + if args.deployment is None: for system, system_dict in config.items(): - if system == 'general': + if system == "general": continue build_systems(system, system_dict) else: build_systems(args.deployment, config[args.deployment]) - - diff --git a/tools/openwhisk/couchdb-service.yaml b/tools/openwhisk/couchdb-service.yaml new file mode 100644 index 00000000..27abac77 --- /dev/null +++ b/tools/openwhisk/couchdb-service.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Service +metadata: + name: access-couchdb + namespace: openwhisk +spec: + ports: + - name: access-couchdb + nodePort: 31201 + port: 5984 + protocol: TCP + targetPort: 5984 + selector: + name: owdev-couchdb + type: NodePort + diff --git a/tools/openwhisk/kind-cluster.yaml b/tools/openwhisk/kind-cluster.yaml new file mode 100644 index 00000000..8a8bb8f7 --- /dev/null +++ b/tools/openwhisk/kind-cluster.yaml @@ -0,0 +1,11 @@ +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +nodes: +- role: control-plane +- role: worker + extraPortMappings: + - hostPort: 31201 + containerPort: 31201 + - hostPort: 31001 + containerPort: 31001 +- role: worker \ No newline at end of file diff --git a/tools/openwhisk/mycluster_template.yaml b/tools/openwhisk/mycluster_template.yaml new file mode 100644 index 00000000..42585620 --- /dev/null +++ b/tools/openwhisk/mycluster_template.yaml @@ -0,0 +1,12 @@ +whisk: + ingress: + type: NodePort + apiHostName: + apiHostPort: + +invoker: + containerFactory: + impl: "kubernetes" + +nginx: + httpsNodePort: \ No newline at end of file diff --git a/tools/openwhisk_preparation.py b/tools/openwhisk_preparation.py new file mode 100644 index 00000000..67ca4699 --- /dev/null +++ b/tools/openwhisk_preparation.py @@ -0,0 +1,353 @@ +#!/usr/bin/env python3 + +import logging +import os +import subprocess +import time +import yaml + + +# Common utils + + +def run_check_process(cmd: str, **kwargs) -> None: + env = os.environ.copy() + env = {**env, **kwargs} + + subprocess.run( + cmd.split(), + check=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + env=env, + ) + + +# helm utils + + +def install_helm() -> None: + try: + logging.info("Installing helm...") + helm_package = subprocess.run( + "curl https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3".split(), + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + ) + subprocess.run( + "sh -".split(), + input=helm_package.stdout, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=True, + ) + logging.info("Helm has been installed") + except (subprocess.CalledProcessError, FileNotFoundError) as e: + logging.error("Cannot install helm, reason: {}".format(e)) + exit(1) + + +def check_helm_installation() -> None: + try: + logging.info("Checking helm installation...") + run_check_process("helm version") + logging.info("helm is installed") + except (subprocess.CalledProcessError, FileNotFoundError): + logging.error("helm is not installed, attempting to install...") + install_helm() + + +# kubectl utils + + +def install_kubectl(kubectl_version: str = "v1.18.0") -> None: + try: + logging.info("Installing kubectl...") + home_path = os.environ["HOME"] + kubectl_path = "{}/.local/bin/kubectl".format(home_path) + run_check_process( + "curl -L -o {} " + "https://storage.googleapis.com/kubernetes-release/release/{}/bin" + "/linux/amd64/kubectl".format(kubectl_path, kubectl_version) + ) + run_check_process("chmod +x {}".format(kubectl_path)) + logging.info("Kubectl has been installed") + except (subprocess.CalledProcessError, FileNotFoundError) as e: + logging.error("Cannot install kubectl, reason: {}".format(e)) + exit(1) + + +def check_kubectl_installation() -> None: + try: + logging.info("Checking kubectl installation...") + run_check_process("kubectl version --client=true") + logging.info("kubectl is installed") + except (subprocess.CalledProcessError, FileNotFoundError): + logging.error("Kubectl is not installed, attempting to install...") + install_kubectl() + + +# kind utils + + +def install_kind(kind_version: str = "v0.8.1") -> None: + try: + logging.info("Installing kind...") + env = os.environ.copy() + env["GO111MODULE"] = "on" + subprocess.run( + "go get sigs.k8s.io/kind@{}".format(kind_version).split(), + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + env=env, + ) + logging.info("Kind has been installed") + except (subprocess.CalledProcessError, FileNotFoundError) as e: + logging.error("Cannot install kind, reason: {}".format(e)) + exit(1) + + +def check_kind_installation() -> None: + try: + logging.info("Checking go installation...") + run_check_process("go version") + logging.info("go is installed") + try: + logging.info("Checking kind installation...") + run_check_process("kind version") + logging.info("kind is installed") + except (subprocess.CalledProcessError, FileNotFoundError): + logging.warning("Cannot find kind, proceeding with installation") + install_kind() + except (subprocess.CalledProcessError, FileNotFoundError) as e: + logging.error("Cannot find go, reason: {}".format(e)) + exit(1) + + +def label_nodes() -> None: + def label_node(node: str, role: str) -> None: + run_check_process("kubectl label node {} openwhisk-role={}".format(node, role)) + + try: + logging.info("Labelling nodes") + label_node("kind-worker", "core") + label_node("kind-worker2", "invoker") + except (subprocess.CalledProcessError, FileNotFoundError) as e: + logging.error("Cannot label nodes, reason: {}".format(e)) + exit(1) + + +def get_worker_ip(worker_node_name: str = "kind-worker") -> str: + try: + logging.info("Retrieving worker IP...") + internal_ip_proc = subprocess.run( + [ + "kubectl", + "get", + "node", + worker_node_name, + "-o", + "go-template='{{ (index .status.addresses 0).address }}'", + ], + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + ) + return internal_ip_proc.stdout.decode("utf-8").replace("'", "") + except (subprocess.CalledProcessError, FileNotFoundError) as e: + logging.error("Cannot retrieve node IP, reason: {}".format(e)) + exit(1) + + +def create_kind_cluster() -> None: + try: + run_check_process("kind create cluster --config openwhisk/kind-cluster.yaml") + while True: + nodes = subprocess.run( + "kubectl get nodes".split(), + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + ) + node_grep = subprocess.run( + "grep kind".split(), + input=nodes.stdout, + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + ) + awk = subprocess.run( + ["awk", r"{print $2}"], + check=True, + input=node_grep.stdout, + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + ) + node_statuses = awk.stdout.decode("utf-8").split() + if all(node_status == "Ready" for node_status in node_statuses): + break + time.sleep(1) + except (subprocess.CalledProcessError, FileNotFoundError) as e: + logging.error("Cannot create kind cluster. reason: {}".format(e)) + exit(1) + + +def check_kind_cluster() -> None: + try: + kind_clusters_process = subprocess.run( + "kind get clusters".split(), + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + ) + kind_clusters = set(kind_clusters_process.stdout.decode("utf-8").split()) + if "kind" not in kind_clusters: + logging.info("Creating kind cluster...") + create_kind_cluster() + except (subprocess.CalledProcessError, FileNotFoundError) as e: + logging.error("Cannot check kind cluster, reason: {}".format(e)) + + +def delete_cluster(): + try: + logging.info("Deleting KinD cluster...") + run_check_process("kind delete cluster") + logging.info("KinD cluster deleted...") + except (subprocess.CalledProcessError, FileNotFoundError) as e: + logging.error("Cannot delete cluster, reason: {}".format(e)) + + +# openwhisk deployment utils + + +def prepare_wsk() -> None: + try: + ip = get_worker_ip() + # default key + auth = "23bc46b1-71f6-4ed5-8c54-816aa4f8c502:123zO3xZCLrMN6v2BKK1dXYFpXlPkccOFqm12CdAsMgRU4VrNZ9lyGVCGuMDGIwP" + subprocess.run( + f"wsk property set --apihost {ip} --auth {auth}", + check=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + except (subprocess.CalledProcessError, FileNotFoundError) as e: + logging.error(f"Cannot find wsk on system, reason: {e}") + exit(1) + + +def expose_couchdb() -> None: + try: + run_check_process("kubectl apply -f openwhisk/couchdb-service.yaml") + except (subprocess.CalledProcessError, FileNotFoundError) as e: + logging.error("Cannot expose Couch DB, reason: {}".format(e)) + + +def clone_openwhisk_chart() -> None: + try: + run_check_process( + "git clone git@github.com:apache/openwhisk-deploy-kube.git /tmp/openwhisk-deploy-kube" + ) + except (subprocess.CalledProcessError, FileNotFoundError) as e: + logging.error("Cannot clone openwhisk chart, reason: {}".format(e)) + + +def prepare_openwhisk_config() -> None: + worker_ip = get_worker_ip() + with open("openwhisk/mycluster_template.yaml", "r") as openwhisk_config_template: + data = yaml.unsafe_load(openwhisk_config_template) + data["whisk"]["ingress"]["apiHostName"] = worker_ip + data["whisk"]["ingress"]["apiHostPort"] = 31001 + data["nginx"]["httpsNodePort"] = 31001 + if not os.path.exists("/tmp/openwhisk-deploy-kube/mycluster.yaml"): + with open("/tmp/openwhisk-deploy-kube/mycluster.yaml", "a+") as openwhisk_config: + openwhisk_config.write(yaml.dump(data, default_flow_style=False)) + + +def deploy_openwhisk_on_k8s(namespace: str = "openwhisk") -> None: + try: + run_check_process( + "helm install owdev /tmp/openwhisk-deploy-kube/helm/openwhisk -n {} " + "--create-namespace -f " + "/tmp/openwhisk-deploy-kube/mycluster.yaml".format(namespace) + ) + while True: + pods = subprocess.run( + "kubectl get pods -n {}".format(namespace).split(), + stderr=subprocess.DEVNULL, + stdout=subprocess.PIPE, + ) + check_result = subprocess.run( + "grep install-packages".split(), + input=pods.stdout, + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + ) + install_packages_status = check_result.stdout.decode("utf-8").split()[2] + if install_packages_status == "Completed": + break + + time.sleep(1) + except (subprocess.CalledProcessError, FileNotFoundError) as e: + logging.error("Cannot install openwhisk, reason: {}".format(e)) + exit(1) + + +def get_openwhisk_url() -> str: + ip = get_worker_ip() + return "{}:{}".format(ip, 31001) + + +def get_couchdb_url() -> str: + ip = get_worker_ip() + return "{}:{}".format(ip, 31201) + + +def install_wsk() -> None: + try: + logging.info("Installing wsk...") + home_path = os.environ["HOME"] + wsk_path = "{}/.local/bin/wsk".format(home_path) + subprocess.run("go get github.com/apache/openwhisk-cli".split()) + run_check_process("go get -u github.com/jteeuwen/go-bindata/...") + instalation_dir = "{}/src/github.com/apache/openwhisk-cli".format(os.environ["GOPATH"]) + + def custom_subproces(comand): + subprocess.run(comand.split(), cwd=instalation_dir, check=True) + + custom_subproces("go-bindata -pkg wski18n -o wski18n/i18n_resources.go wski18n/resources") + custom_subproces("go build -o wsk") + run_check_process("ln -sf {}/wsk {}".format(instalation_dir, wsk_path)) + run_check_process("chmod +x {}".format(wsk_path)) + logging.info("Wsk has been installed") + except (subprocess.CalledProcessError, FileNotFoundError) as e: + logging.error("Cannot install wsk, reason: {}".format(e)) + exit(1) + + +def check_wsk_installation() -> None: + try: + logging.info("Checking wsk installation...") + run_check_process("wsk") + logging.info("Wsk is installed") + except (subprocess.CalledProcessError, FileNotFoundError): + logging.info("Wsk is not installed, proceeding to install...") + install_wsk() + + +# mixup + + +def initiate_all(): + check_kubectl_installation() + check_wsk_installation() + check_helm_installation() + check_kind_installation() + check_kind_cluster() + label_nodes() + clone_openwhisk_chart() + prepare_openwhisk_config() + deploy_openwhisk_on_k8s() + expose_couchdb() + + +if __name__ == "__main__": + initiate_all()