From d217ffbfaecf6136ba18caf07508a1888909654f Mon Sep 17 00:00:00 2001 From: jfrery Date: Thu, 28 Sep 2023 15:26:45 +0200 Subject: [PATCH] chore: add run use case example to GH workflow --- .github/workflows/run_use_cases_examples.yaml | 126 ++++++++++ Makefile | 2 +- script/make_utils/run_use_case_examples.sh | 228 +++++++++--------- .../cifar/cifar_brevitas_training/Makefile | 7 +- use_case_examples/credit_scoring/Makefile | 12 + .../deployment/breast_cancer_builtin/Makefile | 17 -- .../deployment/cifar_8_bit/Makefile | 17 -- .../deployment/sentiment_analysis/Makefile | 17 -- use_case_examples/disease_prediction/Makefile | 12 + .../disease_prediction/requirements.txt | 1 + use_case_examples/federated_learning/Makefile | 14 ++ .../Makefile | 2 +- .../SentimentClassification.ipynb | 39 +-- use_case_examples/titanic/Makefile | 2 +- use_case_examples/titanic/download_data.sh | 13 - 15 files changed, 293 insertions(+), 216 deletions(-) create mode 100644 .github/workflows/run_use_cases_examples.yaml create mode 100644 use_case_examples/credit_scoring/Makefile delete mode 100644 use_case_examples/deployment/breast_cancer_builtin/Makefile delete mode 100644 use_case_examples/deployment/cifar_8_bit/Makefile delete mode 100644 use_case_examples/deployment/sentiment_analysis/Makefile create mode 100644 use_case_examples/disease_prediction/Makefile create mode 100644 use_case_examples/federated_learning/Makefile delete mode 100755 use_case_examples/titanic/download_data.sh diff --git a/.github/workflows/run_use_cases_examples.yaml b/.github/workflows/run_use_cases_examples.yaml new file mode 100644 index 000000000..7e0904943 --- /dev/null +++ b/.github/workflows/run_use_cases_examples.yaml @@ -0,0 +1,126 @@ +name: Run Use Case Examples +on: + push: + # workflow_dispatch: + # schedule: + # - cron: '0 0 * * MON' # Scheduled trigger every Monday at midnight + +concurrency: + group: ${{ github.ref }} + cancel-in-progress: true + +env: + ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + +jobs: + start-runner-linux: + name: Start EC2 runner + runs-on: ubuntu-20.04 + outputs: + label-38: ${{ steps.start-ec2-runner-38.outputs.label }} + ec2-instance-id-38: ${{ steps.start-ec2-runner-38.outputs.ec2-instance-id || '' }} + steps: + - name: Checkout Code + uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@8c3f20df09ac63af7b3ae3d7c91f105f857d8497 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ secrets.AWS_REGION }} + + - name: Start EC2 runner python 38 + id: start-ec2-runner-38 + uses: machulav/ec2-github-runner@4e0303de215db88e1c489e07a15ca4d867f488ea + with: + mode: start + github-token: ${{ secrets.EC2_RUNNER_BOT_TOKEN }} + ec2-image-id: ${{ secrets.AWS_EC2_AMI }} + ec2-instance-type: "m6i.metal" + subnet-id: ${{ secrets.AWS_EC2_SUBNET_ID }} + security-group-id: ${{ secrets.AWS_EC2_SECURITY_GROUP_ID }} + + run-use-case-examples: + needs: [start-runner-linux] + runs-on: ${{ needs.start-runner-linux.outputs.label-38 }} + container: + image: ubuntu:20.04 + defaults: + run: + shell: bash + steps: + - name: Checkout Code + uses: actions/checkout@v2 + + - name: Set up Environment + run: | + # Setup commands if any, for example, installing dependencies, etc. + apt-get update && apt-get install -y python3-venv make && apt install git git-lfs -y + + - name: Run Use Case Examples Script + run: | + chmod +x ./script/make_utils/run_use_case_examples.sh + ./script/make_utils/run_use_case_examples.sh + + stop-runner-linux: + name: Stop EC2 runner + needs: [run-use-case-examples, start-runner-linux] + runs-on: ubuntu-20.04 + if: ${{ always() && (needs.start-runner-linux.result != 'skipped') }} + steps: + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@8c3f20df09ac63af7b3ae3d7c91f105f857d8497 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ secrets.AWS_REGION }} + + - name: Stop EC2 runner python 38 + uses: machulav/ec2-github-runner@4e0303de215db88e1c489e07a15ca4d867f488ea + if: ${{ always() && needs.start-runner-linux.outputs.ec2-instance-id-38 }} + with: + github-token: ${{ secrets.EC2_RUNNER_BOT_TOKEN }} + label: ${{ needs.start-runner-linux.outputs.label-38 }} + ec2-instance-id: ${{ needs.start-runner-linux.outputs.ec2-instance-id-38 }} + mode: stop + + send-report: + if: ${{ always() }} + needs: + [ + start-runner-linux, + run-use-case-examples, + stop-runner-linux, + ] + name: Send Slack notification + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v2 # Update to the latest stable version + + - name: Prepare whole job status + if: ${{ always() }} + continue-on-error: true + env: + NEEDS_JSON: ${{ toJSON(needs) }} + run: | + echo "${NEEDS_JSON}" > /tmp/needs_context.json + JOB_STATUS=$(python3 ./script/actions_utils/actions_combine_status.py \ + --needs_context_json /tmp/needs_context.json) + echo "JOB_STATUS=${JOB_STATUS}" >> "$GITHUB_ENV" + + - name: Slack Notification + if: ${{ always() }} + continue-on-error: true + uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8 + env: + SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} + SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png + SLACK_COLOR: ${{ env.JOB_STATUS || 'failure' }} + SLACK_MESSAGE: "Full run of use case examples finished with status ${{ env.JOB_STATUS || 'failure' }} \ + (${{ env.ACTION_RUN_URL }})\n\ + - start-runner-linux: ${{ needs.start-runner-linux.result || 'Did not run.'}}\n\n\ + - run-use-case-examples: ${{ needs.run-use-case-examples.result || 'Did not run.' }}\n\n\ + - stop-runner-linux: ${{ needs.stop-runner-linux.result || 'Did not run.'}}" + SLACK_USERNAME: ${{ secrets.BOT_USERNAME }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} \ No newline at end of file diff --git a/Makefile b/Makefile index ce01b9e67..243bd4484 100644 --- a/Makefile +++ b/Makefile @@ -827,7 +827,7 @@ clean_sklearn_cache: .PHONY: run_one_use_case_example # Run one use-case example (USE_CASE, eg use_case_examples/hybrid_model) run_one_use_case_example: - ./script/make_utils/run_use_case_examples.sh + USE_CASE=$(USE_CASE) ./script/make_utils/run_use_case_examples.sh .PHONY: run_all_use_case_examples # Run all use-case examples run_all_use_case_examples: diff --git a/script/make_utils/run_use_case_examples.sh b/script/make_utils/run_use_case_examples.sh index 6461466f2..4bf17dfe6 100755 --- a/script/make_utils/run_use_case_examples.sh +++ b/script/make_utils/run_use_case_examples.sh @@ -1,129 +1,123 @@ #!/usr/bin/env bash set -e -DIR=$(dirname "$0") - -# shellcheck disable=SC1090,SC1091 -source "${DIR}/detect_docker.sh" - -if isDocker; then - echo "Can not run in docker -> this script needs to install new virtualenvs" - exit 1 -fi - - -CML_DIR=$(pwd) -USE_CASE_REL_DIR="use_case_examples" -USE_CASE_DIR="${CML_DIR}/${USE_CASE_REL_DIR}" - -if [ ! -d "$USE_CASE_DIR" ]; then - echo "This script must be run in the Concrete ML source root where the '$USE_CASE_REL_DIR' directory is present" - exit 1 -fi - -echo "Refreshing notebooks with PIP installed Concrete ML" - -# shellcheck disable=SC2143 -if [[ $(git ls-files --others --exclude-standard | grep ${USE_CASE_REL_DIR}) ]]; then - echo "This script must be run in a clean clone of the Concrete ML repo" - echo "This directory has untracked files in ${USE_CASE_REL_DIR}" - echo "You can LIST all untracked files using: " - echo - # shellcheck disable=SC2028 - echo " git ls-files --others --exclude-standard | grep ${USE_CASE_REL_DIR}" - echo - echo "You can REMOVE all untracked files using: " - echo - # shellcheck disable=SC2028 - echo " git ls-files --others --exclude-standard | grep ${USE_CASE_REL_DIR} | xargs -0 -d '\n' --no-run-if-empty rm" - echo - exit 1 -fi - -if [[ -z "${USE_CASE}" ]]; then - # shellcheck disable=SC2207 - LIST_OF_USE_CASES=($(find "$USE_CASE_DIR/" -mindepth 1 -maxdepth 2 -type d | grep -v checkpoints)) -else - LIST_OF_USE_CASES=("${USE_CASE}") - if [ ! -d "${USE_CASE}" ]; then - echo "The use case specified to be executed, ${USE_CASE}, does not exist" - exit 1 - fi -fi - -if [ ! "$(docker images -q zamafhe/concrete-ml:latest 2> /dev/null)" ]; then - # BUILD THE DOCKER IMAGE - echo "Building docker image" - poetry build && mkdir -p pkg && cp dist/* pkg/ && make release_docker - docker tag concrete-ml-release:latest zamafhe/concrete-ml:latest -fi - -# shellcheck disable=SC2068 -for EXAMPLE in ${LIST_OF_USE_CASES[@]} -do - EXAMPLE_NAME=$(basename "${EXAMPLE}") - - if [ -f "${EXAMPLE}/Makefile" ]; then - echo "*** Processing example ${EXAMPLE_NAME}" +CURRENT_DIR=$(pwd) +USE_CASE_DIR_NAME="use_case_examples" +USE_CASE_DIR="${CURRENT_DIR}/${USE_CASE_DIR_NAME}" + +export USE_CASE_DIR # Required for the Makefile of the use case examples + +check_directory_exists() { + if [ ! -d "$1" ]; then + echo "Error: Directory '${1}' not found." + exit 1 + fi +} + +check_clean_git_status() { + if git ls-files --others --exclude-standard | grep -q "$1"; then + echo "Error: The repository is not clean. Untracked files found in $1." + echo "List untracked files with: git ls-files --others --exclude-standard | grep $1" + echo "Remove untracked files with: git clean -fdx $1" + exit 1 + fi +} + +setup_virtualenv() { + local venv_path="/tmp/virtualenv_$1" + echo "Setting up virtual environment in $venv_path..." + python3 -m venv "$venv_path" + source "${venv_path}/bin/activate" + echo "Virtual environment activated." +} + +install_concrete_ml() { + pip install -U pip setuptools wheel + if pip install -e .; then + echo "Concrete ML installation successful." else - continue + echo "Failed to install Concrete ML." + return 1 fi +} - # Setup a new venv - VENV_PATH="/tmp/virtualenv_${EXAMPLE_NAME}" - if [ -d "$VENV_PATH" ]; then - echo " - VirtualEnv already exists, deleting the old one" - rm -rf "$VENV_PATH" +install_requirements() { + if [ -f "requirements.txt" ]; then + if pip install -r requirements.txt; then + echo "Requirements installed successfully." + else + echo "Failed to install requirements." + return 1 + fi fi - virtualenv -q "$VENV_PATH" - echo " - VirtualEnv created at $VENV_PATH" - # shellcheck disable=SC1090,SC1091 - source "${VENV_PATH}/bin/activate" - # Install Concrete ML - set +e - cd "$CML_DIR" - pip install -e . &> "/tmp/log_cml_pip_${EXAMPLE_NAME}" - hresult=$? - if [ $hresult -ne 0 ]; then - echo "Could not install Concrete ML in the virtualenv, see /tmp/log_cml_pip_${EXAMPLE_NAME}" - rm -rf "$VENV_PATH" - continue +} + +run_example() { + local example_dir=$1 + local example_name=$(basename "$example_dir") + + if [ ! -f "${example_dir}/Makefile" ]; then + echo "No Makefile found in $example_dir, skipping..." + return fi - set -e - echo " - Concrete ML installed in $VENV_PATH" - # Install example requirements - cd "$EXAMPLE" - if [ -f "requirements.txt" ]; then - set +e - pip install -r requirements.txt &> "/tmp/log_reqs_${EXAMPLE_NAME}" - hresult=$? - set -e - if [ $hresult -ne 0 ]; then - echo "Could not install Concrete ML in the virtualenv, see /tmp/log_reqs_${EXAMPLE_NAME}" - rm -rf "$VENV_PATH" - continue - fi - echo " - Requirements installed in $VENV_PATH" + echo "*** Running example: $example_name ***" + setup_virtualenv "$example_name" + install_concrete_ml || return + install_requirements || return + + echo "Running use case example using Makefile..." + make -C "$example_dir" run_example + + local result="${PIPESTATUS[0]}" + + if [ "$result" -ne 0 ]; then + echo "Failure in example $example_name." + failed_examples+=("$example_name") + else + echo "Successfully completed example $example_name." + success_examples+=("$example_name") fi - - set +e - # Strip colors from the error output before piping to the log files - # Swap stderr and stdout, all output of jupyter execution is in stderr - # The information about time spent running the notebook is in stdout - # The following will pipe the stderr to the regex so that it - # ends up in the log file. - # The timing shows in the terminal - USE_CASE_DIR=$USE_CASE_DIR make 3>&2 2>&1 1>&3- | perl -pe 's/\e([^\[\]]|\[.*?[a-zA-Z]|\].*?\a)//g' > "/tmp/log_${EXAMPLE_NAME}" - - # Neet to check the result of execution of the make command (ignore the results - # of the other commands in the pipe) - hresult="${PIPESTATUS[0]}" - if [ "$hresult" -ne 0 ]; then - echo "Error while running example ${EXAMPLE_NAME} see /tmp/log_${EXAMPLE_NAME}" + + deactivate + rm -rf "/tmp/virtualenv_$example_name" +} + +print_summary() { + echo "Summary of execution results:" + echo "Successful examples: ${#success_examples[@]}" + for example in "${success_examples[@]}"; do + echo " - $example" + done + echo "Failed examples: ${#failed_examples[@]}" + for example in "${failed_examples[@]}"; do + echo " - $example" + done +} + +main() { + check_directory_exists "$USE_CASE_DIR" + check_clean_git_status "$USE_CASE_DIR_NAME" + + declare -a success_examples + declare -a failed_examples + + local LIST_OF_USE_CASES=() + if [[ -z "${USE_CASE}" ]]; then + LIST_OF_USE_CASES=($(find "$USE_CASE_DIR/" -mindepth 1 -maxdepth 2 -type d | grep -v checkpoints | sort)) + else + LIST_OF_USE_CASES=("${USE_CASE_DIR}/${USE_CASE}") + fi + + for use_case in "${LIST_OF_USE_CASES[@]}"; do + run_example "$use_case" + done + + print_summary + + if [ ${#failed_examples[@]} -ne 0 ]; then + exit 1 fi - set -e +} - # Remove the virtualenv - rm -rf "$VENV_PATH" -done +main "$@" \ No newline at end of file diff --git a/use_case_examples/cifar/cifar_brevitas_training/Makefile b/use_case_examples/cifar/cifar_brevitas_training/Makefile index d222b84d0..26f0c96bb 100644 --- a/use_case_examples/cifar/cifar_brevitas_training/Makefile +++ b/use_case_examples/cifar/cifar_brevitas_training/Makefile @@ -2,11 +2,14 @@ export LC_ALL=en_US.UTF-8 export LANG=en_US.UTF-8 -EXAMPLE_NAME=cifar_brevitas_finetuning +EXAMPLE_NAME=cifar_brevitas_training JUPYTER_RUN=jupyter nbconvert --to notebook --inplace --execute TIME_NB="${USE_CASE_DIR}/time_notebook_execution.sh" -run_example: one +run_example: one two one: @python evaluate_one_example_fhe.py + +two: + @python evaluate_torch_cml.py \ No newline at end of file diff --git a/use_case_examples/credit_scoring/Makefile b/use_case_examples/credit_scoring/Makefile new file mode 100644 index 000000000..60bc03f90 --- /dev/null +++ b/use_case_examples/credit_scoring/Makefile @@ -0,0 +1,12 @@ +# Useful for jupyter notebooks +export LC_ALL=en_US.UTF-8 +export LANG=en_US.UTF-8 + +EXAMPLE_NAME=credit_scoring +JUPYTER_RUN=jupyter nbconvert --to notebook --inplace --execute +TIME_NB="${USE_CASE_DIR}/time_notebook_execution.sh" + +run_example: one + +one: + @$(TIME_NB) CreditScoring.ipynb diff --git a/use_case_examples/deployment/breast_cancer_builtin/Makefile b/use_case_examples/deployment/breast_cancer_builtin/Makefile deleted file mode 100644 index 4a22f850f..000000000 --- a/use_case_examples/deployment/breast_cancer_builtin/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -# Useful for jupyter notebooks -export LC_ALL=en_US.UTF-8 -export LANG=en_US.UTF-8 - -EXAMPLE_NAME=cifar_brevitas_finetuning -JUPYTER_RUN=jupyter nbconvert --to notebook --inplace --execute - -run_example: three - -one: - @./train_with_docker.sh - -two: one - @python -m concrete.ml.deployment.deploy_to_docker --only-build - -three: two - @python build_docker_client_image.py diff --git a/use_case_examples/deployment/cifar_8_bit/Makefile b/use_case_examples/deployment/cifar_8_bit/Makefile deleted file mode 100644 index ad1c98316..000000000 --- a/use_case_examples/deployment/cifar_8_bit/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -# Useful for jupyter notebooks -export LC_ALL=en_US.UTF-8 -export LANG=en_US.UTF-8 - -EXAMPLE_NAME=cifar_brevitas_finetuning -JUPYTER_RUN=jupyter nbconvert --to notebook --inplace --execute - -run_example: three - -one: - @python compile_with_docker.py - -two: one - @python -m concrete.ml.deployment.deploy_to_docker --only-build - -three: two - @docker build --tag cifar_client -f Dockerfile.client . diff --git a/use_case_examples/deployment/sentiment_analysis/Makefile b/use_case_examples/deployment/sentiment_analysis/Makefile deleted file mode 100644 index 4a22f850f..000000000 --- a/use_case_examples/deployment/sentiment_analysis/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -# Useful for jupyter notebooks -export LC_ALL=en_US.UTF-8 -export LANG=en_US.UTF-8 - -EXAMPLE_NAME=cifar_brevitas_finetuning -JUPYTER_RUN=jupyter nbconvert --to notebook --inplace --execute - -run_example: three - -one: - @./train_with_docker.sh - -two: one - @python -m concrete.ml.deployment.deploy_to_docker --only-build - -three: two - @python build_docker_client_image.py diff --git a/use_case_examples/disease_prediction/Makefile b/use_case_examples/disease_prediction/Makefile new file mode 100644 index 000000000..f9c290c6f --- /dev/null +++ b/use_case_examples/disease_prediction/Makefile @@ -0,0 +1,12 @@ +# Useful for jupyter notebooks +export LC_ALL=en_US.UTF-8 +export LANG=en_US.UTF-8 + +EXAMPLE_NAME=disease_prediction +JUPYTER_RUN=jupyter nbconvert --to notebook --inplace --execute +TIME_NB="${USE_CASE_DIR}/time_notebook_execution.sh" + +run_example: one + +one: + @$(TIME_NB) HealthCarePrediction.ipynb diff --git a/use_case_examples/disease_prediction/requirements.txt b/use_case_examples/disease_prediction/requirements.txt index 56b78ae9f..1709ea8a1 100644 --- a/use_case_examples/disease_prediction/requirements.txt +++ b/use_case_examples/disease_prediction/requirements.txt @@ -1,3 +1,4 @@ concrete-ml jupyter pandas +matplotlib diff --git a/use_case_examples/federated_learning/Makefile b/use_case_examples/federated_learning/Makefile new file mode 100644 index 000000000..6a5f3bd9b --- /dev/null +++ b/use_case_examples/federated_learning/Makefile @@ -0,0 +1,14 @@ +# Useful for jupyter notebooks +export LC_ALL=en_US.UTF-8 +export LANG=en_US.UTF-8 + +EXAMPLE_NAME=federated_learning +PYTHON_RUN=python + +run_example: run_sh load_to_cml + +run_sh: + @./run.sh + +load_to_cml: + @$(PYTHON_RUN) load_to_cml.py \ No newline at end of file diff --git a/use_case_examples/sentiment_analysis_with_transformer/Makefile b/use_case_examples/sentiment_analysis_with_transformer/Makefile index 3340332bc..dde11d706 100644 --- a/use_case_examples/sentiment_analysis_with_transformer/Makefile +++ b/use_case_examples/sentiment_analysis_with_transformer/Makefile @@ -2,7 +2,7 @@ export LC_ALL=en_US.UTF-8 export LANG=en_US.UTF-8 -EXAMPLE_NAME=cifar_brevitas_finetuning +EXAMPLE_NAME=sentiment_analysis_with_transformers JUPYTER_RUN=jupyter nbconvert --to notebook --inplace --execute TIME_NB="${USE_CASE_DIR}/time_notebook_execution.sh" diff --git a/use_case_examples/sentiment_analysis_with_transformer/SentimentClassification.ipynb b/use_case_examples/sentiment_analysis_with_transformer/SentimentClassification.ipynb index dc9774336..864c64cd7 100644 --- a/use_case_examples/sentiment_analysis_with_transformer/SentimentClassification.ipynb +++ b/use_case_examples/sentiment_analysis_with_transformer/SentimentClassification.ipynb @@ -34,11 +34,11 @@ ], "source": [ "# Import the required packages\n", - "import os\n", "import time\n", "\n", "import numpy\n", "import pandas as pd\n", + "import requests\n", "from sklearn.metrics import average_precision_score\n", "from sklearn.model_selection import GridSearchCV, train_test_split\n", "\n", @@ -49,32 +49,15 @@ "cell_type": "code", "execution_count": 2, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--2023-04-06 09:49:38-- https://huggingface.co/datasets/osanseviero/twitter-airline-sentiment/resolve/main/Tweets.csv\r\n", - "Resolving huggingface.co (huggingface.co)... 13.32.145.73, 13.32.145.5, 13.32.145.11, ...\r\n", - "Connecting to huggingface.co (huggingface.co)|13.32.145.73|:443... connected.\r\n", - "HTTP request sent, awaiting response... 302 Found\r\n", - "Location: https://cdn-lfs.huggingface.co/repos/ce/37/ce37f07d6007921b5a0a814bd1cb03df4a2fa91f9631a025317f3a3e4acbe83c/ea94b23f41892b290dec3330bb8cf9cb6b8bc669eaae5f3a84c40f7b0de8f15e?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27Tweets.csv%3B+filename%3D%22Tweets.csv%22%3B&response-content-type=text%2Fcsv&Expires=1681033779&Policy=eyJTdGF0ZW1lbnQiOlt7IlJlc291cmNlIjoiaHR0cHM6Ly9jZG4tbGZzLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2NlLzM3L2NlMzdmMDdkNjAwNzkyMWI1YTBhODE0YmQxY2IwM2RmNGEyZmE5MWY5NjMxYTAyNTMxN2YzYTNlNGFjYmU4M2MvZWE5NGIyM2Y0MTg5MmIyOTBkZWMzMzMwYmI4Y2Y5Y2I2YjhiYzY2OWVhYWU1ZjNhODRjNDBmN2IwZGU4ZjE1ZT9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSomcmVzcG9uc2UtY29udGVudC10eXBlPSoiLCJDb25kaXRpb24iOnsiRGF0ZUxlc3NUaGFuIjp7IkFXUzpFcG9jaFRpbWUiOjE2ODEwMzM3Nzl9fX1dfQ__&Signature=PvnA1Xq05ghV1ztqtUf4EEQ4HtueYlPV9xuL6bydyg%7EC2IAAq1DcepkTws%7EsMKD8xiSQWxGQBWq7QbrOr0bYwKSD4ACUYBRJ6I4iEeiI3%7EjljnlGjqDKB7khTyrkU3s8oGy6wMKRSWHORpeoiXgBHf21QVghWo5Fl-mA0NhejTQ8gDsH4rowJHzC4k77zUxY0aMuhpHQnbJlTFlYODxjHy%7ELUNL2xUJQkQymsbiCV2dmYkslpIJUdKNj5d4r7jbvfpvZ63In340WF34Ym69%7E5XHNe8v6t1Qy4fOwxg62Qe3CbNlh0Sp9ZNS48%7EZ23az9qevO2CRoSGAsBE3mmS2vCA__&Key-Pair-Id=KVTP0A1DKRTAX [following]\r\n", - "--2023-04-06 09:49:38-- https://cdn-lfs.huggingface.co/repos/ce/37/ce37f07d6007921b5a0a814bd1cb03df4a2fa91f9631a025317f3a3e4acbe83c/ea94b23f41892b290dec3330bb8cf9cb6b8bc669eaae5f3a84c40f7b0de8f15e?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27Tweets.csv%3B+filename%3D%22Tweets.csv%22%3B&response-content-type=text%2Fcsv&Expires=1681033779&Policy=eyJTdGF0ZW1lbnQiOlt7IlJlc291cmNlIjoiaHR0cHM6Ly9jZG4tbGZzLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2NlLzM3L2NlMzdmMDdkNjAwNzkyMWI1YTBhODE0YmQxY2IwM2RmNGEyZmE5MWY5NjMxYTAyNTMxN2YzYTNlNGFjYmU4M2MvZWE5NGIyM2Y0MTg5MmIyOTBkZWMzMzMwYmI4Y2Y5Y2I2YjhiYzY2OWVhYWU1ZjNhODRjNDBmN2IwZGU4ZjE1ZT9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSomcmVzcG9uc2UtY29udGVudC10eXBlPSoiLCJDb25kaXRpb24iOnsiRGF0ZUxlc3NUaGFuIjp7IkFXUzpFcG9jaFRpbWUiOjE2ODEwMzM3Nzl9fX1dfQ__&Signature=PvnA1Xq05ghV1ztqtUf4EEQ4HtueYlPV9xuL6bydyg%7EC2IAAq1DcepkTws%7EsMKD8xiSQWxGQBWq7QbrOr0bYwKSD4ACUYBRJ6I4iEeiI3%7EjljnlGjqDKB7khTyrkU3s8oGy6wMKRSWHORpeoiXgBHf21QVghWo5Fl-mA0NhejTQ8gDsH4rowJHzC4k77zUxY0aMuhpHQnbJlTFlYODxjHy%7ELUNL2xUJQkQymsbiCV2dmYkslpIJUdKNj5d4r7jbvfpvZ63In340WF34Ym69%7E5XHNe8v6t1Qy4fOwxg62Qe3CbNlh0Sp9ZNS48%7EZ23az9qevO2CRoSGAsBE3mmS2vCA__&Key-Pair-Id=KVTP0A1DKRTAX\r\n", - "Resolving cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)... 52.222.174.26, 52.222.174.3, 52.222.174.32, ...\r\n", - "Connecting to cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)|52.222.174.26|:443... connected.\r\n", - "HTTP request sent, awaiting response... 200 OK\r\n", - "Length: 3421431 (3.3M) [text/csv]\r\n", - "Saving to: ‘Tweets.csv’\r\n", - "\r\n", - "Tweets.csv 100%[===================>] 3.26M --.-KB/s in 0.02s \r\n", - "\r\n", - "2023-04-06 09:49:39 (146 MB/s) - ‘Tweets.csv’ saved [3421431/3421431]\r\n", - "\r\n" - ] - } - ], + "outputs": [], "source": [ - "!wget -O Tweets.csv https://huggingface.co/datasets/osanseviero/twitter-airline-sentiment/resolve/main/Tweets.csv" + "url = (\n", + " \"https://huggingface.co/datasets/osanseviero/twitter-airline-sentiment/resolve/main/Tweets.csv\"\n", + ")\n", + "response = requests.get(url)\n", + "\n", + "with open(\"Tweets.csv\", \"wb\") as file:\n", + " file.write(response.content)" ] }, { @@ -93,10 +76,6 @@ } ], "source": [ - "# Download the data-sets\n", - "if not os.path.isfile(\"Tweets.csv\"):\n", - " raise ValueError(\"Please launch the `download_data.sh` script in order to get the data-sets.\")\n", - "\n", "train = pd.read_csv(\"Tweets.csv\", index_col=0)\n", "text_X = train[\"text\"]\n", "y = train[\"airline_sentiment\"]\n", diff --git a/use_case_examples/titanic/Makefile b/use_case_examples/titanic/Makefile index 9753f0880..71556d257 100644 --- a/use_case_examples/titanic/Makefile +++ b/use_case_examples/titanic/Makefile @@ -2,7 +2,7 @@ export LC_ALL=en_US.UTF-8 export LANG=en_US.UTF-8 -EXAMPLE_NAME=cifar_brevitas_finetuning +EXAMPLE_NAME=titanic JUPYTER_RUN=jupyter nbconvert --to notebook --inplace --execute TIME_NB="${USE_CASE_DIR}/time_notebook_execution.sh" diff --git a/use_case_examples/titanic/download_data.sh b/use_case_examples/titanic/download_data.sh deleted file mode 100755 index abbd4ce87..000000000 --- a/use_case_examples/titanic/download_data.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env bash - -set -e - -# You need to have a valid ~/.kaggle/kaggle.json, that you can generate from "Create new API token" -# on your account page in kaggle.com -rm -rf local_datasets -mkdir local_datasets -cd local_datasets - -kaggle competitions download -c titanic - -unzip titanic.zip -d titanic