Run notebooks 🚀 #115
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Run Notebooks on Ubuntu | |
run-name: Run notebooks 🚀 | |
# work in progress | |
# this action runs two jobs: List-Notebook-Files and Run-Notebooks | |
# Run-Notebooks converts ipynb files to Python with nbconvert and runs them on the Ubuntu box | |
# (at the moment this is done only for one file since not all notebook files are ready) | |
# also it would be better to create an image instead of downloading software every time | |
## see https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows | |
on: | |
workflow_dispatch | |
# trigger workflow on push to the master branch of .ipynb file | |
# push: | |
# branches: [master, dev] | |
# paths: | |
# - '/*.ipynb' | |
permissions: | |
contents: write | |
jobs: | |
List-Notebook-Files: | |
runs-on: ubuntu-latest | |
steps: | |
- run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event." | |
- run: echo "🐧 This job is now running on a ${{ runner.os }} server hosted by GitHub!" | |
- run: echo "🔎 The name of your branch is ${{ github.ref }} and your repository is ${{ github.repository }}." | |
- name: Check out repository code | |
uses: actions/checkout@v4 | |
- run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner." | |
- run: echo "🖥️ The workflow is now ready to test your code on the runner." | |
- name: List files in the repository | |
run: | | |
ls ${{ github.workspace }}/*.ipynb | |
- run: echo "🍏 This job's status is ${{ job.status }}." | |
Run-Notebooks: | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
with: | |
sparse-checkout: | | |
ngrams_with_pyspark.ipynb | |
PySpark_On_Google_Colab.ipynb | |
- name: Set up Python environment | |
uses: actions/setup-python@v2 | |
with: | |
python-version: 3.8 | |
# Install requirements | |
- name: Install requirements | |
run: | | |
python --version | |
python -m pip install -r requirements.txt | |
# Install Java (see: https://github.com/actions/setup-java) | |
- name: Install Java 11 | |
uses: actions/setup-java@v4 | |
with: | |
distribution: 'temurin' # See 'Supported distributions' for available options | |
java-version: '11' | |
# Runs a set of commands using the runners shell | |
- name: Run Jupyter Notebooks testing | |
run: | | |
git pull | |
git config user.name github-actions | |
git config user.email github-actions@github.com | |
date > action_log.txt | |
echo "The following notebooks were successfully executed on ubuntu-latest:" >> action_log.txt | |
set -e | |
# just "runnable" notebooks | |
# Not OK: Hadoop_single_node_cluster_setup_Python.ipynb cannot install ssh server | |
# MapReduce_Primer_HelloWorld.ipynb | |
# Apache_Sedona_with_PySpark.ipynb | |
start=`date +%s` | |
for n in Spark_Standalone_Architecture_on_Google_Colab \ | |
Apache_Sedona_with_PySpark.ipynb \ | |
GutenbergBooks.ipynb Hadoop_minicluster.ipynb \ | |
Encoding+dataframe+columns.ipynb \ | |
generate_data_with_Faker.ipynb \ | |
ngrams_with_pyspark.ipynb PySpark_On_Google_Colab.ipynb Run_Spark_on_Google_Colab.ipynb | |
do | |
jupyter nbconvert --log-level=DEBUG --to notebook --execute $n | |
if [ $? -eq 0 ]; then | |
echo ✅ $n >>action_log.txt | |
jupyter nbconvert --to python $n | |
# get the Python file's name by removing .ipynb extension from $n and then adding .py | |
file=$(basename $n ipynb)'py' | |
echo "Python file: $file" | |
if ! [ -f $file ]; then | |
echo "File does not exist." | |
else | |
header="# This file was generated from $n with nbconvert\n# Source: https://github.com/${{ github.repository }}\n\n" | |
echo -e "$header" "$(cat $file)" >$file | |
# push .py file | |
git add $file | |
fi | |
git add $(basename $n .ipynb)'.nbconvert.ipynb' 2>/dev/null | |
else | |
echo ❌ $n >>action_log.txt | |
fi | |
done | |
end=`date +%s` | |
sec2min() { printf "%d:%02d" "$((10#$1 / 60))" "$((10#$1 % 60))"; } | |
runtime=$(sec2min $((end-start))) | |
echo "Runtime: $runtime" >>action_log.txt | |
# https://github.com/actions/checkout?tab=readme-ov-file#push-a-commit-using-the-built-in-token | |
git add ./action_log.txt | |
git commit -m "generated" | |
git push | |
- name: List files currently in the workspace | |
run: | | |
ls ${{ github.workspace }}/*.ipynb ${{ github.workspace }}/*.py |