Skip to content

Commit

Permalink
Merge pull request #85 from godatadriven/84-make-it-easier-to-install…
Browse files Browse the repository at this point in the history
…-and-use-whirl

84 make it easier to install and use whirl
  • Loading branch information
krisgeus authored Dec 23, 2022
2 parents b8ad478 + 2315b38 commit 4ff008b
Show file tree
Hide file tree
Showing 18 changed files with 159 additions and 34 deletions.
13 changes: 12 additions & 1 deletion .gitattributes
Original file line number Diff line number Diff line change
@@ -1,4 +1,15 @@
* text=auto
*.sh text eol=lf
*.bat text eol=crlf
*.cmd text eol=crlf
*.cmd text eol=crlf

.gitattributes export-ignore
.gitignore export-ignore
.github export-ignore
.gitkeep export-ignore
.editorconfig export-ignore
logo export-ignore

#Non finished examples and their envs
examples/dbt** export-ignore
envs/dbt** export-ignore
65 changes: 52 additions & 13 deletions .github/workflows/whirl-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,20 @@ jobs:
steps:
- uses: actions/checkout@v2
- id: setdirs # Give it an id to handle to get step outputs in the outputs key above
run: echo "::set-output name=dir::$(ls -d ./examples/* | jq -R -s -c 'split("\n")[:-1]')"
# Define step output named dir base on ls command transformed to JSON thanks to jq
# run: echo "::set-output name=dir::$(ls -d ./examples/* | jq -R -s -c 'split("\n")[:-1]')"
run: echo "dir=$(ls -d ./examples/* | jq -R -s -c 'split("\n")[:-1]')" >> $GITHUB_OUTPUT
# Define step output named dir based on ls command transformed to JSON thanks to jq

examples: # Job that list subdirectories of ./examples
runs-on: ubuntu-latest
outputs:
# generate output name example by using inner step output
example: ${{ steps.setexamples.outputs.example }}
steps:
- uses: actions/checkout@v2
- id: setexamples # Give it an id to handle to get step outputs in the outputs key above
run: echo "example=$(ls -d ./examples/* | sed -r 's/\.\/examples\/(.*)/\1/g' | jq -R -s -c 'split("\n")[:-1]')" >> $GITHUB_OUTPUT
# Define step output named example based on ls command transformed to JSON thanks to jq

whirl-ci-default-envs:
needs: [directories]
Expand All @@ -34,27 +46,54 @@ jobs:
fail-fast: false
max-parallel: 4
matrix:
example: ${{ fromJson(needs.directories.outputs.dir) }}
example_dir: ${{ fromJson(needs.directories.outputs.dir) }}
python_version: ["3.8", "3.9"]
airflow_version: ["2.2.5", "2.3.2"]
airflow_version: ["2.2.5", "2.5.0"]
exclude:
# Needs more memory than available on the runner
- example: ./examples/dbt-spark-example
- example: ./examples/spark-delta-sharing
- example: ./examples/spark-s3-to-hive
- example_dir: ./examples/dbt-spark-example
- example_dir: ./examples/spark-delta-sharing
- example_dir: ./examples/spark-s3-to-hive
# Exclude failing dbt runs
- example: ./examples/dbt-example
- example_dir: ./examples/dbt-example
env:
PYTHON_VERSION: ${{ matrix.python_version }}
AIRFLOW_VERSION: ${{ matrix.airflow_version }}
steps:
- uses: actions/checkout@v2
- name: Run whirl CI ${{ matrix.example }}
working-directory: ${{ matrix.example }}
- name: Run whirl CI ${{ matrix.example_dir }}
working-directory: ${{ matrix.example_dir }}
run: |
echo Run Ci for example ${{ matrix.example }}
echo Run Ci from example directory ${{ matrix.example_dir }}
../../whirl ci
whirl-ci-default-envs-from-root-dir:
needs: [examples]
runs-on: ubuntu-latest
strategy:
fail-fast: false
max-parallel: 4
matrix:
example: ${{ fromJson(needs.examples.outputs.example) }}
python_version: ["3.8", "3.9"]
airflow_version: ["2.2.5", "2.5.0"]
exclude:
# Needs more memory than available on the runner
- example: dbt-spark-example
- example: spark-delta-sharing
- example: spark-s3-to-hive
# Exclude failing dbt runs
- example: dbt-example
env:
PYTHON_VERSION: ${{ matrix.python_version }}
AIRFLOW_VERSION: ${{ matrix.airflow_version }}
steps:
- uses: actions/checkout@v2
- name: Run whirl CI example ${{ matrix.example }}
run: |
echo Run Ci for example ${{ matrix.example }}
./whirl -x ${{ matrix.example }} ci
whirl-ci-extra-env-spark-s3-to-postgres:
runs-on: ubuntu-latest
Expand All @@ -72,7 +111,7 @@ jobs:
max-parallel: 4
matrix:
python_version: ["3.8", "3.9"]
airflow_version: ["2.2.5", "2.3.2"]
airflow_version: ["2.2.5", "2.5.0"]
runs-on: ubuntu-latest
env:
PYTHON_VERSION: ${{ matrix.python_version }}
Expand All @@ -91,7 +130,7 @@ jobs:
max-parallel: 4
matrix:
python_version: ["3.8", "3.9"]
airflow_version: ["2.2.5", "2.3.2"]
airflow_version: ["2.2.5", "2.5.0"]
runs-on: ubuntu-latest
env:
PYTHON_VERSION: ${{ matrix.python_version }}
Expand Down
21 changes: 21 additions & 0 deletions .github/workflows/whirl-release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name: Publish minimal release archive
on:
release:
types: [published]

jobs:
deploy:
runs-on: ubuntu-latest
permissions:
contents: write # for upload release asset
steps:
- uses: actions/checkout@v2

- name: Run git-archive command to create a release artifact
run: git archive --format=tar.gz --prefix=whirl/ --output=whirl-release.tar.gz HEAD

- name: upload Whirl release artifact
env:
GITHUB_TOKEN: ${{ github.token }}
run: |
gh release upload ${{ github.event.release.tag_name }} whirl-release.tar.gz --clobber
38 changes: 32 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ NOTE: _whirl_ is not intended to replace proper (unit) testing of the logic you

_whirl_ relies on [Docker](https://www.docker.com/) and [Docker Compose](https://docs.docker.com/compose/). Make sure you have it installed. If using _Docker for Mac_ or _Windows_ ensure that you have configured it with sufficient RAM (8GB or more recommended) for running all your containers.

When you want to use _whirl_ in your CI pipeline (currently work in progress), you need to have `jq` installed. For example, with Homebrew:
When you want to use _whirl_ in your CI pipeline, you need to have `jq` installed. For example, with Homebrew:

```bash
brew install jq
Expand All @@ -28,6 +28,8 @@ As of January 2021, Whirl uses Airflow 2.x.x as the default version. A specific

## Getting Started

### Development

Clone this repository:

```
Expand All @@ -38,6 +40,21 @@ For ease of use you can add the base directory to your `PATH` environment variab
export PATH=<target directory of whirl>:${PATH}
```

### Use the release

Download the [latest Whirl release artifact](https://github.com/godatadriven/whirl/releases/download/latest/whirl-release.tar.gz)

Extract the file (for example into `/usr/local/opt`)

```bash
tar -xvzf whirl-release.tar.gz -C /usr/local/opt
```

Make sure the whirl script is available on your path
```bash
export PATH=/usr/local/opt/whirl:$PATH
```

## Usage

The `whirl` script is used to perform all actions.
Expand All @@ -51,18 +68,22 @@ $ whirl --help

#### Starting whirl

The default action is to start the DAG in your current directory. It expects an environment to be configured. You can pass this as a command line argument or you can configure it in a `.whirl.env` file. (See [Configuring environment variables](#configuring-environment-variables).) The environment refers to a directory with the same name in the `envs` directory located near the _whirl_ script.
The default action is to start the DAG in your current directory.

With the `[-x example]` commandline argument you can run whirl from anywhere and tell whirl which example dag to run. The example refers to a directory with the same name in the `examples` directory located near the _whirl_ script.

Whirl expects an environment to be configured. You can pass this as a command line argument `[-e environment]` or you can configure it as environment variable `WHIRL_ENVIRONMENT` in a `.whirl.env` file. (See [Configuring environment variables](#configuring-environment-variables).) The environment refers to a directory with the same name in the `envs` directory located near the _whirl_ script.

```bash
$ whirl [start] [-d <directory>] [-e <environment>]
$ whirl [-x example] [-e <environment>] [start]
```

Specifying the `start` command line argument is a more explicit way to start _whirl_.

#### Stopping whirl

```bash
$ whirl stop [-d <directory>] [-e <environment>]
$ whirl [-x example] [-e <environment>] stop
```
Stops the configured environment.

Expand Down Expand Up @@ -134,8 +155,6 @@ Each example contains it's own README file to explain the specifics of that exam

#### Generic running of examples

From within the example directory the `whirl` command can be executed.

To run a example:

```bash
Expand All @@ -144,6 +163,13 @@ $ cd ./examples/<example-dag-directory>
$ whirl -e <environment to use>
```

or
```bash
$
# Note: here we pass the whirl environment as a command-line argument. It can also be configured with the WHIRL_ENVIRONMENT variable
$ whirl -x <example to run> -e <environment to use>
```

Open your browser to [http://localhost:5000](http://localhost:5000) to access the Airflow UI. Manually enable the DAG and watch the pipeline run to successful completion.


Expand Down
2 changes: 2 additions & 0 deletions envs/dbt-example/.whirl.env
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,5 @@ AIRFLOW__CORE__EXPOSE_CONFIG=True
AIRFLOW__WEBSERVER__EXPOSE_CONFIG=True
AIRFLOW__CORE__LOAD_DEFAULT_CONNECTIONS=False
AIRFLOW__CORE__LOAD_EXAMPLES=False

MINIMAL_AIRFLOW_VERSION=2.3.0
2 changes: 2 additions & 0 deletions envs/postgres-s3-external-spark/.whirl.env
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,5 @@ AIRFLOW__CORE__EXPOSE_CONFIG=True
AIRFLOW__WEBSERVER__EXPOSE_CONFIG=True
AIRFLOW__CORE__LOAD_DEFAULT_CONNECTIONS=False
AIRFLOW__CORE__LOAD_EXAMPLES=False

MINIMAL_AIRFLOW_VERSION=2.3.0
2 changes: 2 additions & 0 deletions envs/postgres-s3-spark/.whirl.env
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,5 @@ AIRFLOW__CORE__EXPOSE_CONFIG=True
AIRFLOW__WEBSERVER__EXPOSE_CONFIG=True
AIRFLOW__CORE__LOAD_DEFAULT_CONNECTIONS=False
AIRFLOW__CORE__LOAD_EXAMPLES=False

MINIMAL_AIRFLOW_VERSION=2.3.0
2 changes: 2 additions & 0 deletions envs/s3-external-spark-hive/.whirl.env
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,5 @@ AIRFLOW__CORE__EXPOSE_CONFIG=True
AIRFLOW__WEBSERVER__EXPOSE_CONFIG=True
AIRFLOW__CORE__LOAD_DEFAULT_CONNECTIONS=False
AIRFLOW__CORE__LOAD_EXAMPLES=False

MINIMAL_AIRFLOW_VERSION=2.3.0
2 changes: 2 additions & 0 deletions envs/s3-spark-delta-sharing-minio/.whirl.env
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,5 @@ AIRFLOW__CORE__EXPOSE_CONFIG=True
AIRFLOW__CORE__LOAD_DEFAULT_CONNECTIONS=False
AIRFLOW__CORE__LOAD_EXAMPLES=False
AIRFLOW__WEBSERVER__EXPOSE_CONFIG=True

MINIMAL_AIRFLOW_VERSION=2.3.0
2 changes: 2 additions & 0 deletions envs/s3-spark-delta-sharing-riverbank/.whirl.env
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,5 @@ AIRFLOW__CORE__EXPOSE_CONFIG=True
AIRFLOW__CORE__LOAD_DEFAULT_CONNECTIONS=False
AIRFLOW__CORE__LOAD_EXAMPLES=False
AIRFLOW__WEBSERVER__EXPOSE_CONFIG=True

MINIMAL_AIRFLOW_VERSION=2.3.0
2 changes: 2 additions & 0 deletions envs/s3-spark-delta-sharing/.whirl.env
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,5 @@ AIRFLOW__CORE__EXPOSE_CONFIG=True
AIRFLOW__CORE__LOAD_DEFAULT_CONNECTIONS=False
AIRFLOW__CORE__LOAD_EXAMPLES=False
AIRFLOW__WEBSERVER__EXPOSE_CONFIG=True

MINIMAL_AIRFLOW_VERSION=2.3.0
2 changes: 1 addition & 1 deletion envs/sftp-mysql-example/.whirl.env
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
MOCK_DATA_FOLDER=$(pwd)/mock-data
MOCK_DATA_FOLDER=${DAG_FOLDER}/mock-data


# Airflow variables
Expand Down
2 changes: 1 addition & 1 deletion examples/dbt-example/.whirl.env
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
WHIRL_ENVIRONMENT=dbt-example
MOCK_DATA_FOLDER=$(pwd)/mock-data
MOCK_DATA_FOLDER=${DAG_FOLDER}/mock-data
2 changes: 1 addition & 1 deletion examples/dbt-spark-example/.whirl.env
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
WHIRL_ENVIRONMENT=s3-external-spark-hive
MOCK_DATA_FOLDER=$(pwd)/mock-data
MOCK_DATA_FOLDER=${DAG_FOLDER}/mock-data
2 changes: 1 addition & 1 deletion examples/spark-delta-sharing/.whirl.env
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
WHIRL_ENVIRONMENT=s3-spark-delta-sharing
MOCK_DATA_FOLDER=$(pwd)/mock-data
MOCK_DATA_FOLDER=${DAG_FOLDER}/mock-data

3 changes: 1 addition & 2 deletions examples/spark-s3-to-hive/.whirl.env
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
WHIRL_ENVIRONMENT=s3-external-spark-hive
MOCK_DATA_FOLDER=$(pwd)/mock-data

MOCK_DATA_FOLDER=${DAG_FOLDER}/mock-data
3 changes: 2 additions & 1 deletion examples/spark-s3-to-postgres/.whirl.env
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
WHIRL_ENVIRONMENT=postgres-s3-external-spark
MOCK_DATA_FOLDER=$(pwd)/mock-data
MOCK_DATA_FOLDER=${DAG_FOLDER}/mock-data
MINIMAL_AIRFLOW_VERSION=2.3.0
28 changes: 21 additions & 7 deletions whirl
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,6 @@ function export_environment_vars() {
# shellcheck disable=SC2034
DOCKER_CONTEXT_FOLDER=${SCRIPT_DIR}/docker
# shellcheck disable=SC2034
DAG_FOLDER=$(pwd)
# shellcheck disable=SC2034
PROJECTNAME=$(basename "${DAG_FOLDER}")
# shellcheck disable=SC2034
WHIRL_INITIATOR=$(whoami)
# shellcheck disable=SC2034
WHIRL_SETUP_FOLDER=/etc/airflow/whirl.setup.d
Expand All @@ -41,6 +37,17 @@ function export_environment_vars() {
. "${SCRIPT_DIR}/.whirl.env"
fi

# determine whether to use the example set at the commandline or
# in the current folder
if [ -z "${WHIRL_EXAMPLE_ARG}" ]; then
# shellcheck disable=SC2034
DAG_FOLDER=$(pwd)
else
DAG_FOLDER=${SCRIPT_DIR}/examples/${WHIRL_EXAMPLE_ARG}
fi
# shellcheck disable=SC2034
PROJECTNAME=$(basename "${DAG_FOLDER}")

# determine whether to use the environment set at the commandline or
# in the DAG FOLDER .whirl.env
if [ -z "${WHIRL_ENVIRONMENT_ARG}" ]; then
Expand Down Expand Up @@ -113,7 +120,7 @@ function export_environment_vars() {
}

detect_potential_dag() {
test "$(find . -type f -name '*.py' -o -name '*.zip' | wc -l)" -gt 0
test "$(find "${DAG_FOLDER}" -type f -name '*.py' -maxdepth 1 -o -name '*.zip' | wc -l)" -gt 0
}

check_next_dagrun_scheduled_today() {
Expand Down Expand Up @@ -340,6 +347,7 @@ logs() {
usage() {
echo "usage: ${BASH_SOURCE[0]} [-h|--help] [-e|--environment env] [start|stop|ci]"
echo " -h|--help display usage"
echo " -x|--example example specify example to run"
echo " -e|--environment environment specify environment to use"
echo " -d|--directory environment_folder specify the folder that contains the environments (defaults to SCRIPT_DIR)"
echo " -l|--logs servicename tail the logs of the service"
Expand All @@ -354,6 +362,11 @@ function read_arguments() {
do
key="${1}"
case ${key} in
-x|--example)
WHIRL_EXAMPLE_ARG="${2}"
shift # past argument
shift # past value
;;
-e|--environment)
WHIRL_ENVIRONMENT_ARG="${2}"
shift # past argument
Expand Down Expand Up @@ -401,8 +414,8 @@ function read_arguments() {
function main() {
read_arguments "$@"

export_environment_vars
if detect_potential_dag; then
export_environment_vars

if [ -z "${LOGS}" ]; then
if [ -z "${STOP}" ]; then
Expand All @@ -414,7 +427,8 @@ function main() {
logs
fi
else
echo "No .py or .zip files found that may contain an Apache Airflow DAG"
echo "No .py or .zip files found in ${DAG_FOLDER} that may contain an Apache Airflow DAG"
echo "did you correctly specify the example directory?"
fi
}

Expand Down

0 comments on commit 4ff008b

Please sign in to comment.