diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml
new file mode 100644
index 0000000..0c5af91
--- /dev/null
+++ b/.github/workflows/build-and-test.yml
@@ -0,0 +1,62 @@
+---
+name: Test and Build
+
+on:
+ pull_request:
+ branches:
+ - main
+
+permissions: {} # yamllint disable-line
+
+jobs:
+ yamllint:
+ name: YAML Lint
+ runs-on: ubuntu-latest
+ permissions:
+ contents: read
+ steps:
+ - name: Checkout
+ id: checkout
+ uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+
+ - name: Run yamllint
+ id: run_yamllint
+ uses: actionshub/yamllint@b772a30c3ba90c5f5aadfe94d8f3599e3a7099c8 # v1.8.2
+
+ markdownlint:
+ name: Markdown Lint
+ runs-on: ubuntu-latest
+ permissions:
+ contents: read
+ steps:
+ - name: Checkout
+ id: checkout
+ uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+
+ - name: Run mdl
+ id: run_mdl
+ uses: actionshub/markdownlint@6c82ff529253530dfbf75c37570876c52692835f # v3.1.4
+
+ build-and-test:
+ if: github.ref != 'main'
+ name: Build and Test
+ runs-on: ubuntu-latest
+ permissions:
+ contents: read
+ strategy:
+ fail-fast: false
+ max-parallel: 3
+ matrix:
+ flavour:
+ - "allspark-notebook"
+ - "datascience-notebook"
+ steps:
+ - name: Checkout
+ id: checkout
+ uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+
+ - name: Build and Test
+ id: build_and_test
+ shell: bash
+ run: |
+ bash scripts/build-and-test.sh "${{ matrix.flavour }}"
diff --git a/.github/workflows/jupyter-lab-test-and-build.yml b/.github/workflows/jupyter-lab-test-and-build.yml
deleted file mode 100644
index ba12268..0000000
--- a/.github/workflows/jupyter-lab-test-and-build.yml
+++ /dev/null
@@ -1,108 +0,0 @@
----
-name: JupyterLab - test and build
-
-on:
- pull_request:
- push:
- branches: [main]
- tags: "*"
-
-jobs:
- yamllint:
- runs-on: ubuntu-latest
- steps:
- - name: Check out code
- uses: actions/checkout@main
- - name: Run yaml Lint
- uses: actionshub/yamllint@main
-
- mdl:
- runs-on: ubuntu-latest
- steps:
- - name: Check out code
- uses: actions/checkout@main
- - name: Run Markdown Lint
- uses: actionshub/markdownlint@main
-
- docker:
- runs-on: [self-hosted, management-ecr]
- strategy:
- fail-fast: false
- max-parallel: 3
- matrix:
- flavour:
- - "datascience-notebook"
- - "allspark-notebook"
- - "oracle-datascience-notebook"
- env:
- REPOSITORY: ${{ matrix.flavour }}
- ECR_REPOSITORY: ${{ matrix.flavour }}
- needs: [mdl, yamllint]
- steps:
- - name: Configure AWS credentials
- uses: aws-actions/configure-aws-credentials@v1
- with:
- aws-region: eu-west-1
- role-to-assume: arn:aws:iam::593291632749:role/github-actions-management-ecr
- role-duration-seconds: 1200
- - name: Login to Amazon ECR
- id: login-ecr
- uses: aws-actions/amazon-ecr-login@v1
- with:
- registries: 593291632749
- - name: Check out code
- uses: actions/checkout@v2
- - name: Prep Tags
- id: prep
- run: |
- TAG=noop
- if [[ $GITHUB_REF == refs/tags/* ]]; then
- TAG=${GITHUB_REF#refs/tags/}
- elif [[ $GITHUB_REF == refs/heads/* ]]; then
- TAG=$(echo ${GITHUB_REF#refs/heads/} | sed -r 's#/+#-#g')
- if [ "${{ github.event.repository.default_branch }}" = "$TAG" ]; then
- TAG=edge
- fi
- elif [[ $GITHUB_REF == refs/pull/* ]]; then
- TAG=pr-${{ github.event.number }}
- elif [ "${{ github.event_name }}" = "push" ]; then
- TAG="sha-${GITHUB_SHA::8}"
- fi
-
- echo "Docker image tag = '$TAG'"
-
- echo ::set-output name=tag::${TAG}
- echo ::set-output name=created::$(date -u +'%Y-%m-%dT%H:%M:%SZ')
-
- - name: Build image
- working-directory: "./${{ matrix.flavour }}"
- run: make build
- env:
- NETWORK: host
- REGISTRY: ${{ steps.login-ecr.outputs.registry }}
- IMAGE_TAG: ${{ steps.prep.outputs.tag }}
- - name: Install InSpec
- uses: actionshub/chef-install@main
- with:
- channel: current
- project: inspec
- - name: Test
- working-directory: "./${{ matrix.flavour }}"
- run: make test
- env:
- NETWORK: host
- REGISTRY: ${{ steps.login-ecr.outputs.registry }}
- IMAGE_TAG: ${{ steps.prep.outputs.tag }}
- - name: Push image
- working-directory: "./${{ matrix.flavour }}"
- run: make push
- env:
- REGISTRY: ${{ steps.login-ecr.outputs.registry }}
- IMAGE_TAG: ${{ steps.prep.outputs.tag }}
- - name: Cleanup
- if: ${{ always() }}
- working-directory: "./${{ matrix.flavour }}"
- run: make clean
- env:
- REGISTRY: ${{ steps.login-ecr.outputs.registry }}
- IMAGE_TAG: ${{ steps.prep.outputs.tag }}
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
new file mode 100644
index 0000000..64c77ba
--- /dev/null
+++ b/.github/workflows/publish.yml
@@ -0,0 +1,46 @@
+---
+name: Publish
+
+on:
+ push:
+ tags:
+ - "v*"
+
+permissions: {} # yamllint disable-line
+
+jobs:
+ publish:
+ name: Publish
+ runs-on: ubuntu-latest
+ permissions:
+ contents: read
+ id-token: write
+ packages: write
+ strategy:
+ fail-fast: false
+ max-parallel: 3
+ matrix:
+ flavour:
+ - "allspark-notebook"
+ - "datascience-notebook"
+ steps:
+ - name: Checkout
+ id: checkout
+ uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+
+ - name: Log in to GitHub Container Registry
+ id: login_ghcr
+ uses: docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d # v3.0.0
+ with:
+ registry: ghcr.io
+ username: ${{ github.actor }}
+ password: ${{ secrets.GITHUB_TOKEN }}
+
+ - name: Build and Push
+ id: build_and_push
+ uses: docker/build-push-action@4a13e500e55cf31b7a5d59a38ab2040ab0f42f56 # v5.1.0
+ with:
+ context: ${{ matrix.flavour }}
+ file: ${{ matrix.flavour }}/Dockerfile
+ push: true
+ tags: ghcr.io/ministryofjustice/analytical-platform-${{ matrix.flavour }}:${{ github.ref_name }}
diff --git a/allspark-notebook/Dockerfile b/allspark-notebook/Dockerfile
index c90ffa9..2961154 100644
--- a/allspark-notebook/Dockerfile
+++ b/allspark-notebook/Dockerfile
@@ -1,57 +1,35 @@
-FROM jupyter/all-spark-notebook:spark-3.1.1@sha256:b73dad39ad5c469a92764e38d7cc4321040d3fedddcad7fcebc4ddc7f9c15ff2
+# lab-4.0.11
+FROM quay.io/jupyter/all-spark-notebook@sha256:a63b0faed54bc21d17a4691d8fae177dd95236e0adddbd9d43ee448dc2d5ba1e
-LABEL maintainer=analytics-platform-tech@digital.justice.gov.uk
+LABEL org.opencontainers.image.vendor="Ministry of Justice" \
+ org.opencontainers.image.authors="Analytical Platform" \
+ org.opencontainers.image.title="Jupyter All Spark Notebook" \
+ maintainer="analytics-platform-tech@digital.justice.gov.uk"
-ENV PATH=$PATH:$HOME/.local/bin
+ENV PATH="${PATH}:${HOME}/.local/bin" \
+ CHOWN_HOME="no" \
+ PYSPARK_SUBMIT_ARGS="--packages com.amazonaws:aws-java-sdk:1.12.134,org.apache.hadoop:hadoop-aws:3.0.1 pyspark-shell"
-# Home directory contents is already owned by UID 1000
-ENV CHOWN_HOME=no
-
-# NB these are sensible defaults but may need to be changed programatically for
-# non local spark (ie. EMR etc.)
-ENV PYSPARK_SUBMIT_ARGS="--packages com.amazonaws:aws-java-sdk:1.12.134,org.apache.hadoop:hadoop-aws:3.0.1 pyspark-shell"
-
-# Container must be run as root to use NB_UID
USER root
-# Install OS pacakges
-#
-# The reason we have installed these has been lost. Including just in case.
-#
-# - gdal-bin
-# - libspatialindex-dev
-# - openssh-client
-#
-RUN apt-get update && \
- apt-get install -y \
- gdal-bin \
- libspatialindex-dev \
- openssh-client && \
- rm -rf /var/lib/apt/lists/*
-
-# I'm not sure this has any effect
+RUN apt-get update --yes \
+ && apt-get install --yes \
+ gdal-bin \
+ libspatialindex-dev \
+ openssh-client \
+ && apt-get clean --yes \
+ && rm -rf /var/lib/apt/lists/* \
+ && pip install --no-cache-dir --upgrade \
+ pip \
+ boto3 \
+ nbstripout \
+ s3fs==2023.12.2 \
+ dataengineeringutils3==1.4.3 \
+ etl-manager==7.6.0 \
+ && conda install --yes \
+ nbstripout \
+ && nbstripout --install --system \
+ && update-alternatives --set editor /bin/nano-tiny
+
+COPY files/add-user-to-group.sh /usr/local/bin/before-notebook.d/add-user-to-group.sh
COPY files/hdfs-site.xml /usr/local/spark/conf/hdfs-site.xml
-
-# add-user-to-group.sh adds the $NB_USER to group 50 (staff) used by RStudio
-COPY files/add-user-to-group.sh /usr/local/bin/before-notebook.d/
-
-# Install python packages
-# - pip - python package manager
-# - boto3 - python AWS library
-# - nbstripout - tool for stripping sensitive data out of notebooks
-#
-RUN pip install --upgrade \
- pip \
- boto3 \
- nbstripout \
- "s3fs<=0.4" \
- dataengineeringutils3==1.3.0 \
- etl-manager==7.4.0
-
-RUN conda install --yes \
- 'nbstripout'
-
-RUN nbstripout --install --system
-
-# Vi just doesn't cut it for some people
-RUN update-alternatives --set editor /bin/nano-tiny
diff --git a/allspark-notebook/Dockerfile.tests b/allspark-notebook/Dockerfile.tests
deleted file mode 100644
index 65c837f..0000000
--- a/allspark-notebook/Dockerfile.tests
+++ /dev/null
@@ -1,4 +0,0 @@
-FROM bash
-
-COPY tests /tests
-COPY files /files
diff --git a/allspark-notebook/Makefile b/allspark-notebook/Makefile
deleted file mode 100644
index 810f421..0000000
--- a/allspark-notebook/Makefile
+++ /dev/null
@@ -1,31 +0,0 @@
-SHELL = '/bin/bash'
-export IMAGE_TAG ?= local
-export BUILD_TAG ?= latest
-export DOCKER_BUILDKIT=1
-export REPOSITORY=allspark-notebook
-export REGISTRY?=mojanalytics
-export NETWORK?=default
-export CHEF_LICENSE=accept-no-persist
-
-.PHONY: build test pull push inspec up clean
-
-pull:
- docker pull ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG}
-
-build:
- docker-compose build tests
- docker build --network=${NETWORK} -t ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG} .
-
-push:
- docker push ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG}
-
-test: clean up
- echo Testing Container Version: ${IMAGE_TAG}
- docker-compose --project-name ${REPOSITORY} run --rm inspec exec tests -t docker://${REPOSITORY}_test_1
-
-clean:
- docker-compose down --volumes --remove-orphans
- docker-compose --project-name ${REPOSITORY} down --remove-orphans
-
-up:
- docker-compose --project-name ${REPOSITORY} up -d tests test
diff --git a/allspark-notebook/docker-compose.yml b/allspark-notebook/docker-compose.yml
deleted file mode 100644
index 60e5b67..0000000
--- a/allspark-notebook/docker-compose.yml
+++ /dev/null
@@ -1,32 +0,0 @@
----
-version: "3.7"
-
-services:
- tests:
- build:
- context: .
- dockerfile: Dockerfile.tests
- network: ${NETWORK:-default}
- volumes:
- - tests:/tests
- test:
- image: ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG:-latest}
- network_mode: ${NETWORK:-default}
- command:
- - "/usr/local/bin/start-notebook.sh"
- - "--NotebookApp.token=''"
- ports: [8888:8888]
- environment: [JUPYTER_ENABLE_LAB=true]
- volumes:
- - tests:/share/tests
- inspec:
- image: chef/inspec:current
- network_mode: ${NETWORK:-default}
- environment:
- CHEF_LICENSE: accept-no-persist
- volumes:
- - /var/run/docker.sock:/var/run/docker.sock
- - tests:/share/tests:ro
-
-volumes:
- tests:
diff --git a/allspark-notebook/files/pyspark-s3.py b/allspark-notebook/files/pyspark-s3.py
deleted file mode 100644
index 74c7f9a..0000000
--- a/allspark-notebook/files/pyspark-s3.py
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/usr/bin/env python
-
-import pyspark
-sc = pyspark.SparkContext("local[*]")
-
-hadoopConf = sc._jsc.hadoopConfiguration()
-hadoopConf.set("fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
diff --git a/allspark-notebook/test/container-structure-test.yml b/allspark-notebook/test/container-structure-test.yml
new file mode 100644
index 0000000..63c37cc
--- /dev/null
+++ b/allspark-notebook/test/container-structure-test.yml
@@ -0,0 +1,41 @@
+---
+schemaVersion: 2.0.0
+
+containerRunOptions:
+ user: "jovyan"
+
+commandTests:
+ - name: "whoami"
+ command: "whoami"
+ expectedOutput: ["jovyan"]
+
+ - name: "user"
+ command: "id"
+ args: ["--user", "jovyan"]
+ expectedOutput: ["1000"]
+
+ - name: "groups"
+ command: "id"
+ args: ["--groups", "jovyan"]
+ expectedOutput: ["100"]
+
+ - name: "conda"
+ command: "conda"
+ args: ["info"]
+ expectedOutput: [".*active environment.*"]
+
+ - name: "nano"
+ command: "nano"
+ args: ["--version"]
+ expectedOutput: ["GNU nano.*"]
+
+ - name: "pip"
+ command: "pip"
+ args: ["--version"]
+ expectedOutput: ["pip.*"]
+
+ - name: "install osmnx"
+ command: "pip"
+ args: ["install", "osmnx"]
+ exitCode: 0
+ expectedOutput: ["Successfully installed.*"]
diff --git a/allspark-notebook/tests/controls/conda_spec.rb b/allspark-notebook/tests/controls/conda_spec.rb
deleted file mode 100644
index c4fa936..0000000
--- a/allspark-notebook/tests/controls/conda_spec.rb
+++ /dev/null
@@ -1,15 +0,0 @@
-title 'Working Conda'
-
-control 'Conda available' do
- impact 'high'
- title 'Conda installer should be available to use'
- desc 'The Conda installer is not preferred, but is the only way to install some packages.'
- tag 'installer'
- tag 'conda'
-
- describe command('conda info') do
- its('exit_status') { should eq 0 }
- its('stdout') { should match /conda/ }
- end
-end
-
diff --git a/allspark-notebook/tests/controls/nano_spec.rb b/allspark-notebook/tests/controls/nano_spec.rb
deleted file mode 100644
index 1a974f0..0000000
--- a/allspark-notebook/tests/controls/nano_spec.rb
+++ /dev/null
@@ -1,19 +0,0 @@
-title 'nano is available and default editor'
-
-control 'nano available and default editor' do
- impact 'low'
- title 'nano is default editor'
- desc 'nano is a simple editor, it should be the default one'
- tag 'nano'
- tag 'editor'
-
- describe command('nano --version') do
- its('exit_status') { should eq 0 }
- end
-
- describe command('editor --version') do
- its('exit_status') { should eq 0 }
- its('stdout') { should match /GNU nano, version/ }
- end
-end
-
diff --git a/allspark-notebook/tests/controls/pip_spec.rb b/allspark-notebook/tests/controls/pip_spec.rb
deleted file mode 100644
index 81d46b4..0000000
--- a/allspark-notebook/tests/controls/pip_spec.rb
+++ /dev/null
@@ -1,29 +0,0 @@
-title 'Working Pip'
-
-control 'Pip available' do
- impact 'high'
- title 'Pip should be available to use'
- desc 'Pip is the preferred installer for python packages.'
- tag 'installer'
- tag 'pip'
-
- describe command('pip --version') do
- its('exit_status') { should eq 0 }
- its('stdout') { should match /pip 2/ }
- end
-end
-
-control 'osmnx' do
- impact 'high'
- title 'Install osmnx'
- desc 'Data scientists should be able top use osmnx' \
- ' OSMX is often installed, but difficult to install with system pacakges' \
- ' and rtree needing installing, which often break without lots of debugging.'
- tag 'installer'
- tag 'pip'
-
- describe command('pip install osmnx') do
- its('exit_status') { should eq 0 }
- its('stdout') { should match /Successfully installed/ }
- end
-end
diff --git a/allspark-notebook/tests/controls/user_spec.rb b/allspark-notebook/tests/controls/user_spec.rb
deleted file mode 100644
index feca701..0000000
--- a/allspark-notebook/tests/controls/user_spec.rb
+++ /dev/null
@@ -1,25 +0,0 @@
-title 'Jovyan User'
-
-control 'Common Users' do
- impact 'high'
- title 'The jovyan user should exist'
- desc 'The joyvan user should exist and should have a UID of 1000'
- tag 'user'
- tag 'group'
-
- describe user('jovyan') do
- it { should exist }
- its('uid') { should eq 1000 }
- end
-end
-
-control 'Common Groups' do
- impact 'high'
- title 'The joyvan user should have the corect groups'
- desc 'joyvan should have the primary group of users and also be in the staff group to match RStudio, but not break this image'
-
- describe user('jovyan') do
- its('gid') { should eq 100 }
- its('groups') { should eq ['users', 'staff']}
- end
-end
diff --git a/allspark-notebook/tests/files/pandas_read_homedir.py b/allspark-notebook/tests/files/pandas_read_homedir.py
deleted file mode 100644
index c2f04c5..0000000
--- a/allspark-notebook/tests/files/pandas_read_homedir.py
+++ /dev/null
@@ -1,3 +0,0 @@
-import pandas as pd
-
-pd.read_csv("path/to/home_dir")
diff --git a/allspark-notebook/tests/files/pandas_read_s3.py b/allspark-notebook/tests/files/pandas_read_s3.py
deleted file mode 100644
index 405e54d..0000000
--- a/allspark-notebook/tests/files/pandas_read_s3.py
+++ /dev/null
@@ -1,3 +0,0 @@
-import pandas as pd
-
-pd.read_csv("s3://blah")
diff --git a/allspark-notebook/tests/files/spark_read_s3.py b/allspark-notebook/tests/files/spark_read_s3.py
deleted file mode 100644
index f2171ee..0000000
--- a/allspark-notebook/tests/files/spark_read_s3.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from pyspark.context import SparkContext
-from pyspark.sql import SparkSession
-
-sc = SparkContext.getOrCreate()
-spark = SparkSession(sc)
-
-df = spark.read.csv("s3a://bucket/path/to/file.csv")
-df.limit(10).show()
diff --git a/allspark-notebook/tests/inspec.lock b/allspark-notebook/tests/inspec.lock
deleted file mode 100644
index e687b9b..0000000
--- a/allspark-notebook/tests/inspec.lock
+++ /dev/null
@@ -1,3 +0,0 @@
----
-lockfile_version: 1
-depends: []
diff --git a/allspark-notebook/tests/inspec.yml b/allspark-notebook/tests/inspec.yml
deleted file mode 100644
index b4fcaa6..0000000
--- a/allspark-notebook/tests/inspec.yml
+++ /dev/null
@@ -1,9 +0,0 @@
----
-name: Allspark Notebook
-title: Allspark Notebook
-maintainer: Analytical Platform
-copyright: Copyright (c) 2020 Crown Copyright (Ministry of Justice)
-copyright_email: "analytics-platform-tech@digital.justice.gov.uk"
-license: MIT
-summary: An InSpec Compliance Profile for Analytical Platform Allspark Notebook
-version: 0.1.0
diff --git a/datascience-notebook/Dockerfile b/datascience-notebook/Dockerfile
index e962ee7..a262ee2 100644
--- a/datascience-notebook/Dockerfile
+++ b/datascience-notebook/Dockerfile
@@ -1,36 +1,34 @@
-FROM jupyter/datascience-notebook:lab-3.1.11
+# lab-4.0.11
+FROM quay.io/jupyter/datascience-notebook@sha256:76148e403aa44017f59b1dd0861d91daae800c7f86e9f39138b9d2703b885082
-LABEL maintainer=analytics-platform-tech@digital.justice.gov.uk
+LABEL org.opencontainers.image.vendor="Ministry of Justice" \
+ org.opencontainers.image.authors="Analytical Platform" \
+ org.opencontainers.image.title="Jupyter Datascience Notebook" \
+ maintainer="analytics-platform-tech@digital.justice.gov.uk"
-ENV PATH=$PATH:$HOME/.local/bin
-
-# Home directory contents is already owned by UID 1000
-ENV CHOWN_HOME=no
+ENV PATH="${PATH}:${HOME}/.local/bin" \
+ CHOWN_HOME="no"
USER root
-RUN apt-get update \
- && apt-get install -y \
- ca-certificates-java \
- openjdk-8-jdk \
- openssh-client \
- software-properties-common \
- gdal-bin \
- libspatialindex-dev \
- && rm -rf /var/lib/apt/lists/*
-
-RUN update-alternatives --set editor /bin/nano-tiny
-
-RUN pip install --upgrade \
- boto3 \
- black \
- nbstripout \
- rtree \
- "s3fs<=0.4"
-
-RUN pip install --upgrade jupyterlab-git
-
-RUN conda install --yes \
- 'nbstripout'
-
-RUN nbstripout --install --system
+RUN apt-get update --yes \
+ && apt-get install --yes \
+ ca-certificates-java \
+ openjdk-8-jdk \
+ openssh-client \
+ software-properties-common \
+ gdal-bin \
+ libspatialindex-dev \
+ && apt-get clean --yes \
+ && rm -rf /var/lib/apt/lists/* \
+ && pip install --no-cache-dir --upgrade \
+ boto3 \
+ black \
+ nbstripout \
+ rtree \
+ s3fs==2023.12.2 \
+ jupyterlab-git \
+ && conda install --yes \
+ nbstripout \
+ && nbstripout --install --system \
+ && update-alternatives --set editor /bin/nano-tiny
diff --git a/datascience-notebook/Dockerfile.tests b/datascience-notebook/Dockerfile.tests
deleted file mode 100644
index 65c837f..0000000
--- a/datascience-notebook/Dockerfile.tests
+++ /dev/null
@@ -1,4 +0,0 @@
-FROM bash
-
-COPY tests /tests
-COPY files /files
diff --git a/datascience-notebook/Makefile b/datascience-notebook/Makefile
deleted file mode 100644
index a8b8019..0000000
--- a/datascience-notebook/Makefile
+++ /dev/null
@@ -1,34 +0,0 @@
-SHELL = '/bin/bash'
-export IMAGE_TAG ?= local
-export BUILD_TAG ?= latest
-export DOCKER_BUILDKIT=1
-export REPOSITORY=datascience-notebook
-export REGISTRY?=mojanalytics
-export NETWORK?=default
-export CHEF_LICENSE=accept-no-persist
-
-.PHONY: build test pull push inspec up clean
-
-pull:
- docker pull ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG}
-
-build:
- docker-compose build tests
- docker build --network=${NETWORK} -t ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG} .
-
-push:
- docker push ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG}
-
-test: clean up
- echo Testing Container Version: ${IMAGE_TAG}
- docker-compose --project-name ${REPOSITORY} run --rm inspec exec tests -t docker://${REPOSITORY}_test_1
-
-clean:
- docker-compose down --volumes --remove-orphans
- docker-compose --project-name ${REPOSITORY} down --volumes
-
-up:
- docker-compose --project-name ${REPOSITORY} up -d tests test
-
-enter:
- docker-compose --project-name ${REPOSITORY} run test bash
diff --git a/datascience-notebook/docker-compose.yml b/datascience-notebook/docker-compose.yml
deleted file mode 100644
index 60e5b67..0000000
--- a/datascience-notebook/docker-compose.yml
+++ /dev/null
@@ -1,32 +0,0 @@
----
-version: "3.7"
-
-services:
- tests:
- build:
- context: .
- dockerfile: Dockerfile.tests
- network: ${NETWORK:-default}
- volumes:
- - tests:/tests
- test:
- image: ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG:-latest}
- network_mode: ${NETWORK:-default}
- command:
- - "/usr/local/bin/start-notebook.sh"
- - "--NotebookApp.token=''"
- ports: [8888:8888]
- environment: [JUPYTER_ENABLE_LAB=true]
- volumes:
- - tests:/share/tests
- inspec:
- image: chef/inspec:current
- network_mode: ${NETWORK:-default}
- environment:
- CHEF_LICENSE: accept-no-persist
- volumes:
- - /var/run/docker.sock:/var/run/docker.sock
- - tests:/share/tests:ro
-
-volumes:
- tests:
diff --git a/datascience-notebook/test/container-structure-test.yml b/datascience-notebook/test/container-structure-test.yml
new file mode 100644
index 0000000..deb39e2
--- /dev/null
+++ b/datascience-notebook/test/container-structure-test.yml
@@ -0,0 +1,46 @@
+---
+schemaVersion: 2.0.0
+
+containerRunOptions:
+ user: "jovyan"
+
+commandTests:
+ - name: "whoami"
+ command: "whoami"
+ expectedOutput: ["jovyan"]
+
+ - name: "user"
+ command: "id"
+ args: ["--user", "jovyan"]
+ expectedOutput: ["1000"]
+
+ - name: "groups"
+ command: "id"
+ args: ["--groups", "jovyan"]
+ expectedOutput: ["100"]
+
+ - name: "conda"
+ command: "conda"
+ args: ["info"]
+ expectedOutput: [".*active environment.*"]
+
+ - name: "juptyer-lab"
+ command: "jupyter-lab"
+ args: ["--version"]
+ expectedOutput: ["4.0.11"]
+
+ - name: "nano"
+ command: "nano"
+ args: ["--version"]
+ expectedOutput: ["GNU nano.*"]
+
+ - name: "pip"
+ command: "pip"
+ args: ["--version"]
+ expectedOutput: ["pip.*"]
+
+ - name: "install osmnx"
+ command: "pip"
+ args: ["install", "osmnx"]
+ exitCode: 0
+ expectedOutput: ["Successfully installed.*"]
diff --git a/datascience-notebook/tests/controls/conda_spec.rb b/datascience-notebook/tests/controls/conda_spec.rb
deleted file mode 100644
index c646439..0000000
--- a/datascience-notebook/tests/controls/conda_spec.rb
+++ /dev/null
@@ -1,14 +0,0 @@
-title 'Working Conda'
-
-control 'Conda available' do
- impact 'high'
- title 'Conda installer should be available to use'
- desc 'The Conda installer is not preferred, but is the only way to install some packages.'
- tag 'installer'
- tag 'conda'
-
- describe command('conda info') do
- its('exit_status') { should eq 0 }
- its('stdout') { should match /conda/ }
- end
-end
diff --git a/datascience-notebook/tests/controls/jupyter_lab_spec.rb b/datascience-notebook/tests/controls/jupyter_lab_spec.rb
deleted file mode 100644
index 355a755..0000000
--- a/datascience-notebook/tests/controls/jupyter_lab_spec.rb
+++ /dev/null
@@ -1,13 +0,0 @@
-title 'Jupyter Lab'
-
-control 'JupyerLab version' do
- impact 'high'
- title 'JupyterLab verioon'
- desc 'JupyerLab should be the correct verion'
- tag 'JupyterLab'
-
- describe command('jupyter-lab --version') do
- its('exit_status') { should eq 0 }
- its('stdout') { should match /3.1.11/ }
- end
-end
diff --git a/datascience-notebook/tests/controls/nano_spec.rb b/datascience-notebook/tests/controls/nano_spec.rb
deleted file mode 100644
index 1a974f0..0000000
--- a/datascience-notebook/tests/controls/nano_spec.rb
+++ /dev/null
@@ -1,19 +0,0 @@
-title 'nano is available and default editor'
-
-control 'nano available and default editor' do
- impact 'low'
- title 'nano is default editor'
- desc 'nano is a simple editor, it should be the default one'
- tag 'nano'
- tag 'editor'
-
- describe command('nano --version') do
- its('exit_status') { should eq 0 }
- end
-
- describe command('editor --version') do
- its('exit_status') { should eq 0 }
- its('stdout') { should match /GNU nano, version/ }
- end
-end
-
diff --git a/datascience-notebook/tests/controls/pip_spec.rb b/datascience-notebook/tests/controls/pip_spec.rb
deleted file mode 100644
index 6878cea..0000000
--- a/datascience-notebook/tests/controls/pip_spec.rb
+++ /dev/null
@@ -1,29 +0,0 @@
-title 'Working Pip'
-
-control 'Pip available' do
- impact 'high'
- title 'Pip should be available to use'
- desc 'Pip is the preferred installer for python packages.'
- tag 'installer'
- tag 'pip'
-
- describe command('pip --version') do
- its('exit_status') { should eq 0 }
- its('stdout') { should match /pip 21/ }
- end
-end
-
-control 'osmnx' do
- impact 'high'
- title 'Install osmnx'
- desc 'Data scientists should be able top use osmnx' \
- ' OSMX is often installed, but difficult to install with system pacakges' \
- ' and rtree needing installing, which often break without lots of debugging.'
- tag 'installer'
- tag 'pip'
-
- describe command('pip install osmnx') do
- its('exit_status') { should eq 0 }
- its('stdout') { should match /Successfully installed/ }
- end
-end
diff --git a/datascience-notebook/tests/controls/user_spec.rb b/datascience-notebook/tests/controls/user_spec.rb
deleted file mode 100644
index ec293b6..0000000
--- a/datascience-notebook/tests/controls/user_spec.rb
+++ /dev/null
@@ -1,25 +0,0 @@
-title 'Jovyan User'
-
-control 'Common Users' do
- impact 'high'
- title 'The jovyan user should exist'
- desc 'The joyvan user should exist This makes sure that it has the same UID & GID as the rstudio images'
- tag 'user'
- tag 'group'
-
- describe user('jovyan') do
- it { should exist }
- its('uid') { should eq 1000 }
- end
-end
-
-#control 'Common Groups' do
-# impact 'high'
-# title 'The joyvan user should have the corect groups'
-# desc 'joyvan should have the primary group of users and also be in the staff group to match RStudio, but not break this image'
-#
-# describe user('jovyan') do
-# its('gid') { should eq 100 }
-# its('groups') { should eq ['users', 'staff']}
-# end
-#end
diff --git a/datascience-notebook/tests/files/pandas_read_homedir.py b/datascience-notebook/tests/files/pandas_read_homedir.py
deleted file mode 100644
index c2f04c5..0000000
--- a/datascience-notebook/tests/files/pandas_read_homedir.py
+++ /dev/null
@@ -1,3 +0,0 @@
-import pandas as pd
-
-pd.read_csv("path/to/home_dir")
diff --git a/datascience-notebook/tests/files/pandas_read_s3.py b/datascience-notebook/tests/files/pandas_read_s3.py
deleted file mode 100644
index 405e54d..0000000
--- a/datascience-notebook/tests/files/pandas_read_s3.py
+++ /dev/null
@@ -1,3 +0,0 @@
-import pandas as pd
-
-pd.read_csv("s3://blah")
diff --git a/datascience-notebook/tests/files/spark_read_s3.py b/datascience-notebook/tests/files/spark_read_s3.py
deleted file mode 100644
index 25e026a..0000000
--- a/datascience-notebook/tests/files/spark_read_s3.py
+++ /dev/null
@@ -1,3 +0,0 @@
-
-
-spark.read_parquet()
diff --git a/datascience-notebook/tests/inspec.lock b/datascience-notebook/tests/inspec.lock
deleted file mode 100644
index e687b9b..0000000
--- a/datascience-notebook/tests/inspec.lock
+++ /dev/null
@@ -1,3 +0,0 @@
----
-lockfile_version: 1
-depends: []
diff --git a/datascience-notebook/tests/inspec.yml b/datascience-notebook/tests/inspec.yml
deleted file mode 100644
index 0256e1c..0000000
--- a/datascience-notebook/tests/inspec.yml
+++ /dev/null
@@ -1,9 +0,0 @@
----
-name: Datascience Notebook
-title: Datascience Notebook
-maintainer: Analytical Platform
-copyright: Copyright (c) 2020 Crown Copyright (Ministry of Justice)
-copyright_email: "analytics-platform-tech@digital.justice.gov.uk"
-license: MIT
-summary: An InSpec Compliance Profile for Analytical Platform Jupyter Datascience Notebook
-version: 0.1.0
diff --git a/files/hdfs-site.xml b/files/hdfs-site.xml
deleted file mode 100644
index 617f159..0000000
--- a/files/hdfs-site.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-
-
- fs.s3a.impl
- org.apache.hadoop.fs.s3a.S3AFileSystem
-
-
diff --git a/files/pyspark-s3.py b/files/pyspark-s3.py
deleted file mode 100644
index da6f313..0000000
--- a/files/pyspark-s3.py
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python
-
-import os
-os.environ['PYSPARK_SUBMIT_ARGS'] = '--packages com.amazonaws:aws-java-sdk:1.7.4,org.apache.hadoop:hadoop-aws:2.7.1 pyspark-shell'
-
-import pyspark
-sc = pyspark.SparkContext("local[*]")
-
-from pyspark.sql import SQLContext
-sqlContext = SQLContext(sc)
-
-hadoopConf = sc._jsc.hadoopConfiguration()
-hadoopConf.set("fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
diff --git a/oracle-datascience-notebook/Dockerfile b/oracle-datascience-notebook/Dockerfile
deleted file mode 100644
index eb79c8a..0000000
--- a/oracle-datascience-notebook/Dockerfile
+++ /dev/null
@@ -1,60 +0,0 @@
-FROM jupyter/datascience-notebook:76402a27fd13
-
-LABEL maintainer=analytics-platform-tech@digital.justice.gov.uk
-
-USER root
-
-ENV PATH=$PATH:$HOME/.local/bin \
- CHOWN_HOME=no \
- ORACLE_HOME=/opt/oracle/instantclient_19_6 \
- LD_LIBRARY_PATH=$ORACLE_HOME:$LD_LIBRARY_PATH \
- PATH=$PATH:$ORACLE_HOME \
- INSTANT_CLIENT_VERSION="19.6.0.0"
-
-RUN apt-get update \
- && apt-get install -y \
- ca-certificates-java \
- openjdk-8-jdk \
- openssh-client \
- software-properties-common \
- gdal-bin \
- libspatialindex-dev \
- libaio1 \
- && rm -rf /var/lib/apt/lists/*
-
-RUN update-alternatives --set editor /bin/nano \
- && usermod -a -G "staff,users" "${NB_USER}"
-
-# Install Oracle Instant Client and SQL*Plus
-# See: https://www.oracle.com/uk/database/technologies/instant-client/linux-x86-64-downloads.html#ic_x64_inst
-RUN mkdir /opt/oracle \
- && cd /opt/oracle \
- && curl -sO https://download.oracle.com/otn_software/linux/instantclient/19600/instantclient-basiclite-linux.x64-${INSTANT_CLIENT_VERSION}.0dbru.zip \
- && curl -sO https://download.oracle.com/otn_software/linux/instantclient/19600/instantclient-sqlplus-linux.x64-${INSTANT_CLIENT_VERSION}.0dbru.zip \
- && unzip instantclient-basiclite-linux.x64-${INSTANT_CLIENT_VERSION}.0dbru.zip \
- && unzip instantclient-sqlplus-linux.x64-${INSTANT_CLIENT_VERSION}.0dbru.zip \
- && rm instantclient-basiclite-linux.x64-${INSTANT_CLIENT_VERSION}.0dbru.zip instantclient-sqlplus-linux.x64-${INSTANT_CLIENT_VERSION}.0dbru.zip
-
-RUN sh -c "echo /opt/oracle/instantclient_19_6 > /etc/ld.so.conf.d/oracle-instantclient.conf" \
- && ldconfig
-
-USER $NB_USER
-RUN pip install --upgrade \
- boto3 \
- black \
- nbstripout \
- rtree \
- s3fs
-
-ENV JUPYTERLAB_DIR="/home/jovyan/.jupyter"
-RUN jupyter labextension install \
- @jupyter-widgets/jupyterlab-manager \
- @jupyterlab/github \
- @jupyterlab/git
-
-RUN pip install flake8 \
- && jupyter labextension install jupyterlab-flake8
-
-RUN pip install jupyterlab_code_formatter==1.3.8 \
- && jupyter labextension install @ryantam626/jupyterlab_code_formatter@v1.3.8 \
- && jupyter serverextension enable --user --py jupyterlab_code_formatter
diff --git a/oracle-datascience-notebook/Dockerfile.tests b/oracle-datascience-notebook/Dockerfile.tests
deleted file mode 100644
index 65c837f..0000000
--- a/oracle-datascience-notebook/Dockerfile.tests
+++ /dev/null
@@ -1,4 +0,0 @@
-FROM bash
-
-COPY tests /tests
-COPY files /files
diff --git a/oracle-datascience-notebook/Makefile b/oracle-datascience-notebook/Makefile
deleted file mode 100644
index 73589b5..0000000
--- a/oracle-datascience-notebook/Makefile
+++ /dev/null
@@ -1,31 +0,0 @@
-SHELL = '/bin/bash'
-export IMAGE_TAG ?= local
-export BUILD_TAG ?= latest
-export DOCKER_BUILDKIT=1
-export REPOSITORY=oracle-datascience-notebook
-export REGISTRY?=mojanalytics
-export NETWORK?=default
-export CHEF_LICENSE=accept-no-persist
-
-.PHONY: build test pull push inspec up clean
-
-pull:
- docker pull ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG}
-
-build:
- docker-compose build tests
- docker build --network=${NETWORK} -t ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG} .
-
-push:
- docker push ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG}
-
-test: clean up
- echo Testing Container Version: ${IMAGE_TAG}
- docker-compose --project-name ${REPOSITORY} run --rm inspec exec tests -t docker://${REPOSITORY}_test_1
-
-clean:
- docker-compose down --volumes --remove-orphans
- docker-compose --project-name ${REPOSITORY} down --volumes
-
-up:
- docker-compose --project-name ${REPOSITORY} up -d tests test
diff --git a/oracle-datascience-notebook/docker-compose.yml b/oracle-datascience-notebook/docker-compose.yml
deleted file mode 100644
index 60e5b67..0000000
--- a/oracle-datascience-notebook/docker-compose.yml
+++ /dev/null
@@ -1,32 +0,0 @@
----
-version: "3.7"
-
-services:
- tests:
- build:
- context: .
- dockerfile: Dockerfile.tests
- network: ${NETWORK:-default}
- volumes:
- - tests:/tests
- test:
- image: ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG:-latest}
- network_mode: ${NETWORK:-default}
- command:
- - "/usr/local/bin/start-notebook.sh"
- - "--NotebookApp.token=''"
- ports: [8888:8888]
- environment: [JUPYTER_ENABLE_LAB=true]
- volumes:
- - tests:/share/tests
- inspec:
- image: chef/inspec:current
- network_mode: ${NETWORK:-default}
- environment:
- CHEF_LICENSE: accept-no-persist
- volumes:
- - /var/run/docker.sock:/var/run/docker.sock
- - tests:/share/tests:ro
-
-volumes:
- tests:
diff --git a/oracle-datascience-notebook/files/hdfs-site.xml b/oracle-datascience-notebook/files/hdfs-site.xml
deleted file mode 100644
index 617f159..0000000
--- a/oracle-datascience-notebook/files/hdfs-site.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-
-
- fs.s3a.impl
- org.apache.hadoop.fs.s3a.S3AFileSystem
-
-
diff --git a/oracle-datascience-notebook/files/pyspark-s3.py b/oracle-datascience-notebook/files/pyspark-s3.py
deleted file mode 100644
index da6f313..0000000
--- a/oracle-datascience-notebook/files/pyspark-s3.py
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python
-
-import os
-os.environ['PYSPARK_SUBMIT_ARGS'] = '--packages com.amazonaws:aws-java-sdk:1.7.4,org.apache.hadoop:hadoop-aws:2.7.1 pyspark-shell'
-
-import pyspark
-sc = pyspark.SparkContext("local[*]")
-
-from pyspark.sql import SQLContext
-sqlContext = SQLContext(sc)
-
-hadoopConf = sc._jsc.hadoopConfiguration()
-hadoopConf.set("fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
diff --git a/oracle-datascience-notebook/tests/controls/conda_spec.rb b/oracle-datascience-notebook/tests/controls/conda_spec.rb
deleted file mode 100644
index c4fa936..0000000
--- a/oracle-datascience-notebook/tests/controls/conda_spec.rb
+++ /dev/null
@@ -1,15 +0,0 @@
-title 'Working Conda'
-
-control 'Conda available' do
- impact 'high'
- title 'Conda installer should be available to use'
- desc 'The Conda installer is not preferred, but is the only way to install some packages.'
- tag 'installer'
- tag 'conda'
-
- describe command('conda info') do
- its('exit_status') { should eq 0 }
- its('stdout') { should match /conda/ }
- end
-end
-
diff --git a/oracle-datascience-notebook/tests/controls/nano_spec.rb b/oracle-datascience-notebook/tests/controls/nano_spec.rb
deleted file mode 100644
index 1a974f0..0000000
--- a/oracle-datascience-notebook/tests/controls/nano_spec.rb
+++ /dev/null
@@ -1,19 +0,0 @@
-title 'nano is available and default editor'
-
-control 'nano available and default editor' do
- impact 'low'
- title 'nano is default editor'
- desc 'nano is a simple editor, it should be the default one'
- tag 'nano'
- tag 'editor'
-
- describe command('nano --version') do
- its('exit_status') { should eq 0 }
- end
-
- describe command('editor --version') do
- its('exit_status') { should eq 0 }
- its('stdout') { should match /GNU nano, version/ }
- end
-end
-
diff --git a/oracle-datascience-notebook/tests/controls/pip_spec.rb b/oracle-datascience-notebook/tests/controls/pip_spec.rb
deleted file mode 100644
index eb31df1..0000000
--- a/oracle-datascience-notebook/tests/controls/pip_spec.rb
+++ /dev/null
@@ -1,14 +0,0 @@
-title 'Working Pip'
-
-control 'Pip available' do
- impact 'high'
- title 'Pip should be available to use'
- desc 'Pip is the preferred installer for python packages.'
- tag 'installer'
- tag 'pip'
-
- describe command('pip --version') do
- its('exit_status') { should eq 0 }
- its('stdout') { should match /pip 20/ }
- end
-end
diff --git a/oracle-datascience-notebook/tests/controls/user_spec.rb b/oracle-datascience-notebook/tests/controls/user_spec.rb
deleted file mode 100644
index f3a8555..0000000
--- a/oracle-datascience-notebook/tests/controls/user_spec.rb
+++ /dev/null
@@ -1,25 +0,0 @@
-title 'Jovyan User'
-
-control 'Common Users' do
- impact 'high'
- title 'The jovyan user should exist'
- desc 'The joyvan user should exist This makes sure that it has the same UID & GID as the rstudio images'
- tag 'user'
- tag 'group'
-
- describe user('jovyan') do
- it { should exist }
- its('uid') { should eq 1000 }
- end
-end
-
-control 'Common Groups' do
- impact 'high'
- title 'The joyvan user should have the corect groups'
- desc 'joyvan should have the primary group of users and also be in the staff group to match RStudio, but not break this image'
-
- describe user('jovyan') do
- its('gid') { should eq 100 }
- its('groups') { should eq ['users', 'staff']}
- end
-end
diff --git a/oracle-datascience-notebook/tests/files/pandas_read_homedir.py b/oracle-datascience-notebook/tests/files/pandas_read_homedir.py
deleted file mode 100644
index c2f04c5..0000000
--- a/oracle-datascience-notebook/tests/files/pandas_read_homedir.py
+++ /dev/null
@@ -1,3 +0,0 @@
-import pandas as pd
-
-pd.read_csv("path/to/home_dir")
diff --git a/oracle-datascience-notebook/tests/files/pandas_read_s3.py b/oracle-datascience-notebook/tests/files/pandas_read_s3.py
deleted file mode 100644
index 405e54d..0000000
--- a/oracle-datascience-notebook/tests/files/pandas_read_s3.py
+++ /dev/null
@@ -1,3 +0,0 @@
-import pandas as pd
-
-pd.read_csv("s3://blah")
diff --git a/oracle-datascience-notebook/tests/files/spark_read_s3.py b/oracle-datascience-notebook/tests/files/spark_read_s3.py
deleted file mode 100644
index 25e026a..0000000
--- a/oracle-datascience-notebook/tests/files/spark_read_s3.py
+++ /dev/null
@@ -1,3 +0,0 @@
-
-
-spark.read_parquet()
diff --git a/oracle-datascience-notebook/tests/inspec.lock b/oracle-datascience-notebook/tests/inspec.lock
deleted file mode 100644
index e687b9b..0000000
--- a/oracle-datascience-notebook/tests/inspec.lock
+++ /dev/null
@@ -1,3 +0,0 @@
----
-lockfile_version: 1
-depends: []
diff --git a/oracle-datascience-notebook/tests/inspec.yml b/oracle-datascience-notebook/tests/inspec.yml
deleted file mode 100644
index 1bd8f79..0000000
--- a/oracle-datascience-notebook/tests/inspec.yml
+++ /dev/null
@@ -1,9 +0,0 @@
----
-name: Oracle Datascience Notebook
-title: Oracle Datascience Notebook
-maintainer: Analytical Platform
-copyright: Copyright (c) 2020 Crown Copyright (Ministry of Justice)
-copyright_email: "analytics-platform-tech@digital.justice.gov.uk"
-license: MIT
-summary: An InSpec Compliance Profile for Analytical Platform Jupyter Datascience Notebook
-version: 0.1.0
diff --git a/scripts/build-and-test.sh b/scripts/build-and-test.sh
new file mode 100644
index 0000000..f0f9db7
--- /dev/null
+++ b/scripts/build-and-test.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+
+IMAGE="${1}"
+IMAGE_TAG="analytical-platform.service.justice.gov.uk/${IMAGE}:local"
+CONTAINER_STRUCTURE_TEST_IMAGE="gcr.io/gcp-runtimes/container-structure-test:latest"
+
+if [[ "${REMOTE_CONTAINERS}" ]] && [[ "$(uname -m)" == "aarch64" ]]; then
+ echo "(⚠) Looks like you're running in a dev container on Apple Silicon."
+ echo "(⚠) This script builds linux/amd64 images which might take a long time or even fail."
+ export PLATFORM_FLAG="--platform linux/amd64"
+fi
+
+echo "Building [ ${IMAGE} ] as [ ${IMAGE_TAG} ]"
+
+docker build ${PLATFORM_FLAG} --file "${IMAGE}/Dockerfile" --tag "${IMAGE_TAG}" "${IMAGE}"
+
+if [[ -f "${IMAGE}/test/container-structure-test.yml" ]]; then
+ echo "Running container structure test for [ ${IMAGE_TAG} ]"
+
+ docker run --rm ${PLATFORM_FLAG} \
+ --volume /var/run/docker.sock:/var/run/docker.sock \
+ --volume "${PWD}:/workspace" \
+ --workdir /workspace \
+ "${CONTAINER_STRUCTURE_TEST_IMAGE}" \
+ test --image "${IMAGE_TAG}" --config "/workspace/${IMAGE}/test/container-structure-test.yml"
+fi
diff --git a/datascience-notebook/files/hdfs-site.xml b/src/files/hdfs-site.xml
similarity index 100%
rename from datascience-notebook/files/hdfs-site.xml
rename to src/files/hdfs-site.xml
diff --git a/datascience-notebook/files/pyspark-s3.py b/src/files/pyspark-s3.py
similarity index 100%
rename from datascience-notebook/files/pyspark-s3.py
rename to src/files/pyspark-s3.py