From 26a21c062ee79e2921fd41e5beb22710a57ccfef Mon Sep 17 00:00:00 2001
From: Jacob Woffenden <jacob.woffenden@digital.justice.gov.uk>
Date: Mon, 29 Jan 2024 17:22:18 +0000
Subject: [PATCH] =?UTF-8?q?=E2=AC=86=EF=B8=8F=20Upgrade=20Jupyter=20image?=
 =?UTF-8?q?=20(#76)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jacob Woffenden <jacob.woffenden@digital.justice.gov.uk>
---
 .github/workflows/build-and-test.yml          |  62 ++++++++++
 .../workflows/jupyter-lab-test-and-build.yml  | 108 ------------------
 .github/workflows/publish.yml                 |  46 ++++++++
 allspark-notebook/Dockerfile                  |  80 +++++--------
 allspark-notebook/Dockerfile.tests            |   4 -
 allspark-notebook/Makefile                    |  31 -----
 allspark-notebook/docker-compose.yml          |  32 ------
 allspark-notebook/files/pyspark-s3.py         |   7 --
 .../test/container-structure-test.yml         |  41 +++++++
 .../tests/controls/conda_spec.rb              |  15 ---
 allspark-notebook/tests/controls/nano_spec.rb |  19 ---
 allspark-notebook/tests/controls/pip_spec.rb  |  29 -----
 allspark-notebook/tests/controls/user_spec.rb |  25 ----
 .../tests/files/pandas_read_homedir.py        |   3 -
 .../tests/files/pandas_read_s3.py             |   3 -
 .../tests/files/spark_read_s3.py              |   8 --
 allspark-notebook/tests/inspec.lock           |   3 -
 allspark-notebook/tests/inspec.yml            |   9 --
 datascience-notebook/Dockerfile               |  60 +++++-----
 datascience-notebook/Dockerfile.tests         |   4 -
 datascience-notebook/Makefile                 |  34 ------
 datascience-notebook/docker-compose.yml       |  32 ------
 .../test/container-structure-test.yml         |  46 ++++++++
 .../tests/controls/conda_spec.rb              |  14 ---
 .../tests/controls/jupyter_lab_spec.rb        |  13 ---
 .../tests/controls/nano_spec.rb               |  19 ---
 .../tests/controls/pip_spec.rb                |  29 -----
 .../tests/controls/user_spec.rb               |  25 ----
 .../tests/files/pandas_read_homedir.py        |   3 -
 .../tests/files/pandas_read_s3.py             |   3 -
 .../tests/files/spark_read_s3.py              |   3 -
 datascience-notebook/tests/inspec.lock        |   3 -
 datascience-notebook/tests/inspec.yml         |   9 --
 files/hdfs-site.xml                           |   6 -
 files/pyspark-s3.py                           |  13 ---
 oracle-datascience-notebook/Dockerfile        |  60 ----------
 oracle-datascience-notebook/Dockerfile.tests  |   4 -
 oracle-datascience-notebook/Makefile          |  31 -----
 .../docker-compose.yml                        |  32 ------
 .../files/hdfs-site.xml                       |   6 -
 .../files/pyspark-s3.py                       |  13 ---
 .../tests/controls/conda_spec.rb              |  15 ---
 .../tests/controls/nano_spec.rb               |  19 ---
 .../tests/controls/pip_spec.rb                |  14 ---
 .../tests/controls/user_spec.rb               |  25 ----
 .../tests/files/pandas_read_homedir.py        |   3 -
 .../tests/files/pandas_read_s3.py             |   3 -
 .../tests/files/spark_read_s3.py              |   3 -
 oracle-datascience-notebook/tests/inspec.lock |   3 -
 oracle-datascience-notebook/tests/inspec.yml  |   9 --
 scripts/build-and-test.sh                     |  26 +++++
 .../files/hdfs-site.xml                       |   0
 .../files/pyspark-s3.py                       |   0
 53 files changed, 279 insertions(+), 828 deletions(-)
 create mode 100644 .github/workflows/build-and-test.yml
 delete mode 100644 .github/workflows/jupyter-lab-test-and-build.yml
 create mode 100644 .github/workflows/publish.yml
 delete mode 100644 allspark-notebook/Dockerfile.tests
 delete mode 100644 allspark-notebook/Makefile
 delete mode 100644 allspark-notebook/docker-compose.yml
 delete mode 100644 allspark-notebook/files/pyspark-s3.py
 create mode 100644 allspark-notebook/test/container-structure-test.yml
 delete mode 100644 allspark-notebook/tests/controls/conda_spec.rb
 delete mode 100644 allspark-notebook/tests/controls/nano_spec.rb
 delete mode 100644 allspark-notebook/tests/controls/pip_spec.rb
 delete mode 100644 allspark-notebook/tests/controls/user_spec.rb
 delete mode 100644 allspark-notebook/tests/files/pandas_read_homedir.py
 delete mode 100644 allspark-notebook/tests/files/pandas_read_s3.py
 delete mode 100644 allspark-notebook/tests/files/spark_read_s3.py
 delete mode 100644 allspark-notebook/tests/inspec.lock
 delete mode 100644 allspark-notebook/tests/inspec.yml
 delete mode 100644 datascience-notebook/Dockerfile.tests
 delete mode 100644 datascience-notebook/Makefile
 delete mode 100644 datascience-notebook/docker-compose.yml
 create mode 100644 datascience-notebook/test/container-structure-test.yml
 delete mode 100644 datascience-notebook/tests/controls/conda_spec.rb
 delete mode 100644 datascience-notebook/tests/controls/jupyter_lab_spec.rb
 delete mode 100644 datascience-notebook/tests/controls/nano_spec.rb
 delete mode 100644 datascience-notebook/tests/controls/pip_spec.rb
 delete mode 100644 datascience-notebook/tests/controls/user_spec.rb
 delete mode 100644 datascience-notebook/tests/files/pandas_read_homedir.py
 delete mode 100644 datascience-notebook/tests/files/pandas_read_s3.py
 delete mode 100644 datascience-notebook/tests/files/spark_read_s3.py
 delete mode 100644 datascience-notebook/tests/inspec.lock
 delete mode 100644 datascience-notebook/tests/inspec.yml
 delete mode 100644 files/hdfs-site.xml
 delete mode 100644 files/pyspark-s3.py
 delete mode 100644 oracle-datascience-notebook/Dockerfile
 delete mode 100644 oracle-datascience-notebook/Dockerfile.tests
 delete mode 100644 oracle-datascience-notebook/Makefile
 delete mode 100644 oracle-datascience-notebook/docker-compose.yml
 delete mode 100644 oracle-datascience-notebook/files/hdfs-site.xml
 delete mode 100644 oracle-datascience-notebook/files/pyspark-s3.py
 delete mode 100644 oracle-datascience-notebook/tests/controls/conda_spec.rb
 delete mode 100644 oracle-datascience-notebook/tests/controls/nano_spec.rb
 delete mode 100644 oracle-datascience-notebook/tests/controls/pip_spec.rb
 delete mode 100644 oracle-datascience-notebook/tests/controls/user_spec.rb
 delete mode 100644 oracle-datascience-notebook/tests/files/pandas_read_homedir.py
 delete mode 100644 oracle-datascience-notebook/tests/files/pandas_read_s3.py
 delete mode 100644 oracle-datascience-notebook/tests/files/spark_read_s3.py
 delete mode 100644 oracle-datascience-notebook/tests/inspec.lock
 delete mode 100644 oracle-datascience-notebook/tests/inspec.yml
 create mode 100644 scripts/build-and-test.sh
 rename {datascience-notebook => src}/files/hdfs-site.xml (100%)
 rename {datascience-notebook => src}/files/pyspark-s3.py (100%)

diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml
new file mode 100644
index 0000000..0c5af91
--- /dev/null
+++ b/.github/workflows/build-and-test.yml
@@ -0,0 +1,62 @@
+---
+name: Test and Build
+
+on:
+  pull_request:
+    branches:
+      - main
+
+permissions: {} # yamllint disable-line
+
+jobs:
+  yamllint:
+    name: YAML Lint
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    steps:
+      - name: Checkout
+        id: checkout
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+
+      - name: Run yamllint
+        id: run_yamllint
+        uses: actionshub/yamllint@b772a30c3ba90c5f5aadfe94d8f3599e3a7099c8 # v1.8.2
+
+  markdownlint:
+    name: Markdown Lint
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    steps:
+      - name: Checkout
+        id: checkout
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+
+      - name: Run mdl
+        id: run_mdl
+        uses: actionshub/markdownlint@6c82ff529253530dfbf75c37570876c52692835f # v3.1.4
+
+  build-and-test:
+    if: github.ref != 'main'
+    name: Build and Test
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    strategy:
+      fail-fast: false
+      max-parallel: 3
+      matrix:
+        flavour:
+          - "allspark-notebook"
+          - "datascience-notebook"
+    steps:
+      - name: Checkout
+        id: checkout
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+
+      - name: Build and Test
+        id: build_and_test
+        shell: bash
+        run: |
+          bash scripts/build-and-test.sh "${{ matrix.flavour }}"
diff --git a/.github/workflows/jupyter-lab-test-and-build.yml b/.github/workflows/jupyter-lab-test-and-build.yml
deleted file mode 100644
index ba12268..0000000
--- a/.github/workflows/jupyter-lab-test-and-build.yml
+++ /dev/null
@@ -1,108 +0,0 @@
----
-name: JupyterLab - test and build
-
-on:
-  pull_request:
-  push:
-    branches: [main]
-    tags: "*"
-
-jobs:
-  yamllint:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Check out code
-        uses: actions/checkout@main
-      - name: Run yaml Lint
-        uses: actionshub/yamllint@main
-
-  mdl:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Check out code
-        uses: actions/checkout@main
-      - name: Run Markdown Lint
-        uses: actionshub/markdownlint@main
-
-  docker:
-    runs-on: [self-hosted, management-ecr]
-    strategy:
-      fail-fast: false
-      max-parallel: 3
-      matrix:
-        flavour:
-          - "datascience-notebook"
-          - "allspark-notebook"
-          - "oracle-datascience-notebook"
-    env:
-      REPOSITORY: ${{ matrix.flavour }}
-      ECR_REPOSITORY: ${{ matrix.flavour }}
-    needs: [mdl, yamllint]
-    steps:
-      - name: Configure AWS credentials
-        uses: aws-actions/configure-aws-credentials@v1
-        with:
-          aws-region: eu-west-1
-          role-to-assume: arn:aws:iam::593291632749:role/github-actions-management-ecr
-          role-duration-seconds: 1200
-      - name: Login to Amazon ECR
-        id: login-ecr
-        uses: aws-actions/amazon-ecr-login@v1
-        with:
-          registries: 593291632749
-      - name: Check out code
-        uses: actions/checkout@v2
-      - name: Prep Tags
-        id: prep
-        run: |
-          TAG=noop
-          if [[ $GITHUB_REF == refs/tags/* ]]; then
-            TAG=${GITHUB_REF#refs/tags/}
-          elif [[ $GITHUB_REF == refs/heads/* ]]; then
-            TAG=$(echo ${GITHUB_REF#refs/heads/} | sed -r 's#/+#-#g')
-            if [ "${{ github.event.repository.default_branch }}" = "$TAG" ]; then
-              TAG=edge
-            fi
-          elif [[ $GITHUB_REF == refs/pull/* ]]; then
-            TAG=pr-${{ github.event.number }}
-          elif [ "${{ github.event_name }}" = "push" ]; then
-            TAG="sha-${GITHUB_SHA::8}"
-          fi
-
-          echo "Docker image tag = '$TAG'"
-
-          echo ::set-output name=tag::${TAG}
-          echo ::set-output name=created::$(date -u +'%Y-%m-%dT%H:%M:%SZ')
-
-      - name: Build image
-        working-directory: "./${{ matrix.flavour }}"
-        run: make build
-        env:
-          NETWORK: host
-          REGISTRY: ${{ steps.login-ecr.outputs.registry }}
-          IMAGE_TAG: ${{ steps.prep.outputs.tag }}
-      - name: Install InSpec
-        uses: actionshub/chef-install@main
-        with:
-          channel: current
-          project: inspec
-      - name: Test
-        working-directory: "./${{ matrix.flavour }}"
-        run: make test
-        env:
-          NETWORK: host
-          REGISTRY: ${{ steps.login-ecr.outputs.registry }}
-          IMAGE_TAG: ${{ steps.prep.outputs.tag }}
-      - name: Push image
-        working-directory: "./${{ matrix.flavour }}"
-        run: make push
-        env:
-          REGISTRY: ${{ steps.login-ecr.outputs.registry }}
-          IMAGE_TAG: ${{ steps.prep.outputs.tag }}
-      - name: Cleanup
-        if: ${{ always() }}
-        working-directory: "./${{ matrix.flavour }}"
-        run: make clean
-        env:
-          REGISTRY: ${{ steps.login-ecr.outputs.registry }}
-          IMAGE_TAG: ${{ steps.prep.outputs.tag }}
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
new file mode 100644
index 0000000..64c77ba
--- /dev/null
+++ b/.github/workflows/publish.yml
@@ -0,0 +1,46 @@
+---
+name: Publish
+
+on:
+  push:
+    tags:
+      - "v*"
+
+permissions: {}  # yamllint disable-line
+
+jobs:
+  publish:
+    name: Publish
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      id-token: write
+      packages: write
+    strategy:
+      fail-fast: false
+      max-parallel: 3
+      matrix:
+        flavour:
+          - "allspark-notebook"
+          - "datascience-notebook"
+    steps:
+      - name: Checkout
+        id: checkout
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+
+      - name: Log in to GitHub Container Registry
+        id: login_ghcr
+        uses: docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d # v3.0.0
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Build and Push
+        id: build_and_push
+        uses: docker/build-push-action@4a13e500e55cf31b7a5d59a38ab2040ab0f42f56 # v5.1.0
+        with:
+          context: ${{ matrix.flavour }}
+          file: ${{ matrix.flavour }}/Dockerfile
+          push: true
+          tags: ghcr.io/ministryofjustice/analytical-platform-${{ matrix.flavour }}:${{ github.ref_name }}
diff --git a/allspark-notebook/Dockerfile b/allspark-notebook/Dockerfile
index c90ffa9..2961154 100644
--- a/allspark-notebook/Dockerfile
+++ b/allspark-notebook/Dockerfile
@@ -1,57 +1,35 @@
-FROM jupyter/all-spark-notebook:spark-3.1.1@sha256:b73dad39ad5c469a92764e38d7cc4321040d3fedddcad7fcebc4ddc7f9c15ff2
+# lab-4.0.11
+FROM quay.io/jupyter/all-spark-notebook@sha256:a63b0faed54bc21d17a4691d8fae177dd95236e0adddbd9d43ee448dc2d5ba1e
 
-LABEL maintainer=analytics-platform-tech@digital.justice.gov.uk
+LABEL org.opencontainers.image.vendor="Ministry of Justice" \
+      org.opencontainers.image.authors="Analytical Platform" \
+      org.opencontainers.image.title="Jupyter All Spark Notebook" \
+      maintainer="analytics-platform-tech@digital.justice.gov.uk"
 
-ENV PATH=$PATH:$HOME/.local/bin
+ENV PATH="${PATH}:${HOME}/.local/bin" \
+    CHOWN_HOME="no" \
+    PYSPARK_SUBMIT_ARGS="--packages com.amazonaws:aws-java-sdk:1.12.134,org.apache.hadoop:hadoop-aws:3.0.1 pyspark-shell"
 
-# Home directory contents is already owned by UID 1000
-ENV CHOWN_HOME=no
-
-# NB these are sensible defaults but may need to be changed programatically for
-# non local spark (ie. EMR etc.)
-ENV PYSPARK_SUBMIT_ARGS="--packages com.amazonaws:aws-java-sdk:1.12.134,org.apache.hadoop:hadoop-aws:3.0.1 pyspark-shell"
-
-# Container must be run as root to use NB_UID
 USER root
 
-# Install OS pacakges
-#
-# The reason we have installed these has been lost. Including just in case.
-#
-# - gdal-bin
-# - libspatialindex-dev
-# - openssh-client
-#
-RUN apt-get update && \
-  apt-get install -y \
-  gdal-bin \
-  libspatialindex-dev \
-  openssh-client && \
-  rm -rf /var/lib/apt/lists/*
-
-# I'm not sure this has any effect
+RUN apt-get update --yes \
+    && apt-get install --yes \
+         gdal-bin \
+         libspatialindex-dev \
+         openssh-client \
+    && apt-get clean --yes \
+    && rm -rf /var/lib/apt/lists/* \
+    && pip install --no-cache-dir --upgrade \
+      pip \
+      boto3 \
+      nbstripout \
+      s3fs==2023.12.2 \
+      dataengineeringutils3==1.4.3 \
+      etl-manager==7.6.0 \
+    && conda install --yes \
+         nbstripout \
+    && nbstripout --install --system \
+    && update-alternatives --set editor /bin/nano-tiny
+
+COPY files/add-user-to-group.sh /usr/local/bin/before-notebook.d/add-user-to-group.sh
 COPY files/hdfs-site.xml /usr/local/spark/conf/hdfs-site.xml
-
-# add-user-to-group.sh adds the $NB_USER to group 50 (staff) used by RStudio
-COPY files/add-user-to-group.sh /usr/local/bin/before-notebook.d/
-
-# Install python packages
-# - pip - python package manager
-# - boto3 - python AWS library
-# - nbstripout - tool for stripping sensitive data out of notebooks
-# 
-RUN pip install --upgrade \
-  pip \
-  boto3 \
-  nbstripout \
-  "s3fs<=0.4" \
-  dataengineeringutils3==1.3.0 \
-  etl-manager==7.4.0
-
-RUN conda install --yes \
-    'nbstripout' 
-
-RUN nbstripout --install --system
-    
-# Vi just doesn't cut it for some people
-RUN update-alternatives --set editor /bin/nano-tiny
diff --git a/allspark-notebook/Dockerfile.tests b/allspark-notebook/Dockerfile.tests
deleted file mode 100644
index 65c837f..0000000
--- a/allspark-notebook/Dockerfile.tests
+++ /dev/null
@@ -1,4 +0,0 @@
-FROM bash
-
-COPY tests /tests
-COPY files /files
diff --git a/allspark-notebook/Makefile b/allspark-notebook/Makefile
deleted file mode 100644
index 810f421..0000000
--- a/allspark-notebook/Makefile
+++ /dev/null
@@ -1,31 +0,0 @@
-SHELL = '/bin/bash'
-export IMAGE_TAG ?= local
-export BUILD_TAG ?= latest
-export DOCKER_BUILDKIT=1
-export REPOSITORY=allspark-notebook
-export REGISTRY?=mojanalytics
-export NETWORK?=default
-export CHEF_LICENSE=accept-no-persist
-
-.PHONY: build test pull push inspec up clean
-
-pull:
-	docker pull ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG}
-
-build:
-	docker-compose build tests
-	docker build --network=${NETWORK} -t ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG} .
-
-push:
-	docker push ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG}
-
-test: clean up
-	echo Testing Container Version: ${IMAGE_TAG}
-	docker-compose --project-name ${REPOSITORY} run --rm inspec exec tests -t docker://${REPOSITORY}_test_1
-
-clean:
-	docker-compose down --volumes --remove-orphans
-	docker-compose --project-name ${REPOSITORY} down --remove-orphans
-
-up:
-	docker-compose --project-name ${REPOSITORY} up -d tests test
diff --git a/allspark-notebook/docker-compose.yml b/allspark-notebook/docker-compose.yml
deleted file mode 100644
index 60e5b67..0000000
--- a/allspark-notebook/docker-compose.yml
+++ /dev/null
@@ -1,32 +0,0 @@
----
-version: "3.7"
-
-services:
-  tests:
-    build:
-      context: .
-      dockerfile: Dockerfile.tests
-      network: ${NETWORK:-default}
-    volumes:
-      - tests:/tests
-  test:
-    image: ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG:-latest}
-    network_mode: ${NETWORK:-default}
-    command:
-      - "/usr/local/bin/start-notebook.sh"
-      - "--NotebookApp.token=''"
-    ports: [8888:8888]
-    environment: [JUPYTER_ENABLE_LAB=true]
-    volumes:
-      - tests:/share/tests
-  inspec:
-    image: chef/inspec:current
-    network_mode: ${NETWORK:-default}
-    environment:
-      CHEF_LICENSE: accept-no-persist
-    volumes:
-      - /var/run/docker.sock:/var/run/docker.sock
-      - tests:/share/tests:ro
-
-volumes:
-  tests:
diff --git a/allspark-notebook/files/pyspark-s3.py b/allspark-notebook/files/pyspark-s3.py
deleted file mode 100644
index 74c7f9a..0000000
--- a/allspark-notebook/files/pyspark-s3.py
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/usr/bin/env python
-
-import pyspark
-sc = pyspark.SparkContext("local[*]")
-
-hadoopConf = sc._jsc.hadoopConfiguration()
-hadoopConf.set("fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
diff --git a/allspark-notebook/test/container-structure-test.yml b/allspark-notebook/test/container-structure-test.yml
new file mode 100644
index 0000000..63c37cc
--- /dev/null
+++ b/allspark-notebook/test/container-structure-test.yml
@@ -0,0 +1,41 @@
+---
+schemaVersion: 2.0.0
+
+containerRunOptions:
+  user: "jovyan"
+
+commandTests:
+  - name: "whoami"
+    command: "whoami"
+    expectedOutput: ["jovyan"]
+
+  - name: "user"
+    command: "id"
+    args: ["--user", "jovyan"]
+    expectedOutput: ["1000"]
+
+  - name: "groups"
+    command: "id"
+    args: ["--groups", "jovyan"]
+    expectedOutput: ["100"]
+
+  - name: "conda"
+    command: "conda"
+    args: ["info"]
+    expectedOutput: [".*active environment.*"]
+
+  - name: "nano"
+    command: "nano"
+    args: ["--version"]
+    expectedOutput: ["GNU nano.*"]
+
+  - name: "pip"
+    command: "pip"
+    args: ["--version"]
+    expectedOutput: ["pip.*"]
+
+  - name: "install osmnx"
+    command: "pip"
+    args: ["install", "osmnx"]
+    exitCode: 0
+    expectedOutput: ["Successfully installed.*"]
diff --git a/allspark-notebook/tests/controls/conda_spec.rb b/allspark-notebook/tests/controls/conda_spec.rb
deleted file mode 100644
index c4fa936..0000000
--- a/allspark-notebook/tests/controls/conda_spec.rb
+++ /dev/null
@@ -1,15 +0,0 @@
-title 'Working Conda'
-
-control 'Conda available' do
-  impact 'high'
-  title 'Conda installer should be available to use'
-  desc 'The Conda installer is not preferred, but is the only way to install some packages.'
-  tag 'installer'
-  tag 'conda'
-
-  describe command('conda info') do
-    its('exit_status') { should eq 0 }
-    its('stdout') { should match /conda/ }
-  end
-end
-
diff --git a/allspark-notebook/tests/controls/nano_spec.rb b/allspark-notebook/tests/controls/nano_spec.rb
deleted file mode 100644
index 1a974f0..0000000
--- a/allspark-notebook/tests/controls/nano_spec.rb
+++ /dev/null
@@ -1,19 +0,0 @@
-title 'nano is available and default editor'
-
-control 'nano available and default editor' do
-  impact 'low'
-  title 'nano is default editor'
-  desc 'nano is a simple editor, it should be the default one'
-  tag 'nano'
-  tag 'editor'
-
-  describe command('nano --version') do
-    its('exit_status') { should eq 0 }
-  end
-
-  describe command('editor --version') do
-    its('exit_status') { should eq 0 }
-    its('stdout') { should match /GNU nano, version/ }
-  end
-end
-
diff --git a/allspark-notebook/tests/controls/pip_spec.rb b/allspark-notebook/tests/controls/pip_spec.rb
deleted file mode 100644
index 81d46b4..0000000
--- a/allspark-notebook/tests/controls/pip_spec.rb
+++ /dev/null
@@ -1,29 +0,0 @@
-title 'Working Pip'
-
-control 'Pip available' do
-  impact 'high'
-  title 'Pip should be available to use'
-  desc 'Pip is the preferred installer for python packages.'
-  tag 'installer'
-  tag 'pip'
-
-  describe command('pip  --version') do
-    its('exit_status') { should eq 0 }
-    its('stdout') { should match /pip 2/ }
-  end
-end
-
-control 'osmnx' do
-  impact 'high'
-  title 'Install osmnx'
-  desc 'Data scientists should be able top use osmnx' \
-  ' OSMX is often installed, but difficult to install with system pacakges' \
-  ' and rtree needing installing, which often break without lots of debugging.'
-  tag 'installer'
-  tag 'pip'
-
-  describe command('pip install osmnx') do
-    its('exit_status') { should eq 0 }
-    its('stdout') { should match /Successfully installed/ }
-  end
-end
diff --git a/allspark-notebook/tests/controls/user_spec.rb b/allspark-notebook/tests/controls/user_spec.rb
deleted file mode 100644
index feca701..0000000
--- a/allspark-notebook/tests/controls/user_spec.rb
+++ /dev/null
@@ -1,25 +0,0 @@
-title 'Jovyan User'
-
-control 'Common Users' do
-  impact 'high'
-  title 'The jovyan user should exist'
-  desc 'The joyvan user should exist and should have a UID of 1000'
-  tag 'user'
-  tag 'group'
-
-  describe user('jovyan') do
-    it { should exist }
-    its('uid') { should eq 1000 }
-  end
-end
-
-control 'Common Groups' do
-  impact 'high'
-  title 'The joyvan user should have the corect groups'
-  desc 'joyvan should have the primary group of users and also be in the staff group to match RStudio, but not break this image'
-
-  describe user('jovyan') do
-    its('gid') { should eq 100 }
-    its('groups') { should eq ['users', 'staff']}
-  end
-end
diff --git a/allspark-notebook/tests/files/pandas_read_homedir.py b/allspark-notebook/tests/files/pandas_read_homedir.py
deleted file mode 100644
index c2f04c5..0000000
--- a/allspark-notebook/tests/files/pandas_read_homedir.py
+++ /dev/null
@@ -1,3 +0,0 @@
-import pandas as pd
-
-pd.read_csv("path/to/home_dir")
diff --git a/allspark-notebook/tests/files/pandas_read_s3.py b/allspark-notebook/tests/files/pandas_read_s3.py
deleted file mode 100644
index 405e54d..0000000
--- a/allspark-notebook/tests/files/pandas_read_s3.py
+++ /dev/null
@@ -1,3 +0,0 @@
-import pandas as pd
-
-pd.read_csv("s3://blah")
diff --git a/allspark-notebook/tests/files/spark_read_s3.py b/allspark-notebook/tests/files/spark_read_s3.py
deleted file mode 100644
index f2171ee..0000000
--- a/allspark-notebook/tests/files/spark_read_s3.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from pyspark.context import SparkContext
-from pyspark.sql import SparkSession
-
-sc = SparkContext.getOrCreate()
-spark = SparkSession(sc)
-
-df = spark.read.csv("s3a://bucket/path/to/file.csv")
-df.limit(10).show()
diff --git a/allspark-notebook/tests/inspec.lock b/allspark-notebook/tests/inspec.lock
deleted file mode 100644
index e687b9b..0000000
--- a/allspark-notebook/tests/inspec.lock
+++ /dev/null
@@ -1,3 +0,0 @@
----
-lockfile_version: 1
-depends: []
diff --git a/allspark-notebook/tests/inspec.yml b/allspark-notebook/tests/inspec.yml
deleted file mode 100644
index b4fcaa6..0000000
--- a/allspark-notebook/tests/inspec.yml
+++ /dev/null
@@ -1,9 +0,0 @@
----
-name: Allspark Notebook
-title: Allspark Notebook
-maintainer: Analytical Platform
-copyright: Copyright (c) 2020 Crown Copyright (Ministry of Justice)
-copyright_email: "analytics-platform-tech@digital.justice.gov.uk"
-license: MIT
-summary: An InSpec Compliance Profile for Analytical Platform Allspark Notebook
-version: 0.1.0
diff --git a/datascience-notebook/Dockerfile b/datascience-notebook/Dockerfile
index e962ee7..a262ee2 100644
--- a/datascience-notebook/Dockerfile
+++ b/datascience-notebook/Dockerfile
@@ -1,36 +1,34 @@
-FROM jupyter/datascience-notebook:lab-3.1.11
+# lab-4.0.11
+FROM quay.io/jupyter/datascience-notebook@sha256:76148e403aa44017f59b1dd0861d91daae800c7f86e9f39138b9d2703b885082
 
-LABEL maintainer=analytics-platform-tech@digital.justice.gov.uk
+LABEL org.opencontainers.image.vendor="Ministry of Justice" \
+      org.opencontainers.image.authors="Analytical Platform" \
+      org.opencontainers.image.title="Jupyter Datascience Notebook" \
+      maintainer="analytics-platform-tech@digital.justice.gov.uk"
 
-ENV PATH=$PATH:$HOME/.local/bin
-
-# Home directory contents is already owned by UID 1000
-ENV CHOWN_HOME=no
+ENV PATH="${PATH}:${HOME}/.local/bin" \
+    CHOWN_HOME="no"
 
 USER root
 
-RUN apt-get update \
-  && apt-get install -y \
-  ca-certificates-java \
-  openjdk-8-jdk \
-  openssh-client \
-  software-properties-common \
-  gdal-bin \
-  libspatialindex-dev \
-  && rm -rf /var/lib/apt/lists/*
-
-RUN update-alternatives --set editor /bin/nano-tiny
-
-RUN pip install --upgrade \
-  boto3 \
-  black \
-  nbstripout \
-  rtree \
-  "s3fs<=0.4"
-
-RUN pip install --upgrade jupyterlab-git
-
-RUN conda install --yes \
-    'nbstripout' 
-
-RUN nbstripout --install --system
+RUN apt-get update --yes \
+    && apt-get install --yes \
+         ca-certificates-java \
+         openjdk-8-jdk \
+         openssh-client \
+         software-properties-common \
+         gdal-bin \
+         libspatialindex-dev \
+    && apt-get clean --yes \
+    && rm -rf /var/lib/apt/lists/* \
+    && pip install --no-cache-dir --upgrade \
+         boto3 \
+         black \
+         nbstripout \
+         rtree \
+         s3fs==2023.12.2 \
+         jupyterlab-git \
+    && conda install --yes \
+         nbstripout \
+    && nbstripout --install --system \
+    && update-alternatives --set editor /bin/nano-tiny
diff --git a/datascience-notebook/Dockerfile.tests b/datascience-notebook/Dockerfile.tests
deleted file mode 100644
index 65c837f..0000000
--- a/datascience-notebook/Dockerfile.tests
+++ /dev/null
@@ -1,4 +0,0 @@
-FROM bash
-
-COPY tests /tests
-COPY files /files
diff --git a/datascience-notebook/Makefile b/datascience-notebook/Makefile
deleted file mode 100644
index a8b8019..0000000
--- a/datascience-notebook/Makefile
+++ /dev/null
@@ -1,34 +0,0 @@
-SHELL = '/bin/bash'
-export IMAGE_TAG ?= local
-export BUILD_TAG ?= latest
-export DOCKER_BUILDKIT=1
-export REPOSITORY=datascience-notebook
-export REGISTRY?=mojanalytics
-export NETWORK?=default
-export CHEF_LICENSE=accept-no-persist
-
-.PHONY: build test pull push inspec up clean
-
-pull:
-	docker pull ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG}
-
-build:
-	docker-compose build tests
-	docker build --network=${NETWORK} -t ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG} .
-
-push:
-	docker push ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG}
-
-test: clean up
-	echo Testing Container Version: ${IMAGE_TAG}
-	docker-compose --project-name ${REPOSITORY} run --rm inspec exec tests -t docker://${REPOSITORY}_test_1
-
-clean:
-	docker-compose down --volumes --remove-orphans
-	docker-compose --project-name ${REPOSITORY} down --volumes
-
-up:
-	docker-compose --project-name ${REPOSITORY} up -d tests test
-
-enter:
-	docker-compose --project-name ${REPOSITORY} run test bash
diff --git a/datascience-notebook/docker-compose.yml b/datascience-notebook/docker-compose.yml
deleted file mode 100644
index 60e5b67..0000000
--- a/datascience-notebook/docker-compose.yml
+++ /dev/null
@@ -1,32 +0,0 @@
----
-version: "3.7"
-
-services:
-  tests:
-    build:
-      context: .
-      dockerfile: Dockerfile.tests
-      network: ${NETWORK:-default}
-    volumes:
-      - tests:/tests
-  test:
-    image: ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG:-latest}
-    network_mode: ${NETWORK:-default}
-    command:
-      - "/usr/local/bin/start-notebook.sh"
-      - "--NotebookApp.token=''"
-    ports: [8888:8888]
-    environment: [JUPYTER_ENABLE_LAB=true]
-    volumes:
-      - tests:/share/tests
-  inspec:
-    image: chef/inspec:current
-    network_mode: ${NETWORK:-default}
-    environment:
-      CHEF_LICENSE: accept-no-persist
-    volumes:
-      - /var/run/docker.sock:/var/run/docker.sock
-      - tests:/share/tests:ro
-
-volumes:
-  tests:
diff --git a/datascience-notebook/test/container-structure-test.yml b/datascience-notebook/test/container-structure-test.yml
new file mode 100644
index 0000000..deb39e2
--- /dev/null
+++ b/datascience-notebook/test/container-structure-test.yml
@@ -0,0 +1,46 @@
+---
+schemaVersion: 2.0.0
+
+containerRunOptions:
+  user: "jovyan"
+
+commandTests:
+  - name: "whoami"
+    command: "whoami"
+    expectedOutput: ["jovyan"]
+
+  - name: "user"
+    command: "id"
+    args: ["--user", "jovyan"]
+    expectedOutput: ["1000"]
+
+  - name: "groups"
+    command: "id"
+    args: ["--groups", "jovyan"]
+    expectedOutput: ["100"]
+
+  - name: "conda"
+    command: "conda"
+    args: ["info"]
+    expectedOutput: [".*active environment.*"]
+
+  - name: "juptyer-lab"
+    command: "jupyter-lab"
+    args: ["--version"]
+    expectedOutput: ["4.0.11"]
+
+  - name: "nano"
+    command: "nano"
+    args: ["--version"]
+    expectedOutput: ["GNU nano.*"]
+
+  - name: "pip"
+    command: "pip"
+    args: ["--version"]
+    expectedOutput: ["pip.*"]
+
+  - name: "install osmnx"
+    command: "pip"
+    args: ["install", "osmnx"]
+    exitCode: 0
+    expectedOutput: ["Successfully installed.*"]
diff --git a/datascience-notebook/tests/controls/conda_spec.rb b/datascience-notebook/tests/controls/conda_spec.rb
deleted file mode 100644
index c646439..0000000
--- a/datascience-notebook/tests/controls/conda_spec.rb
+++ /dev/null
@@ -1,14 +0,0 @@
-title 'Working Conda'
-
-control 'Conda available' do
-  impact 'high'
-  title 'Conda installer should be available to use'
-  desc 'The Conda installer is not preferred, but is the only way to install some packages.'
-  tag 'installer'
-  tag 'conda'
-
-  describe command('conda info') do
-    its('exit_status') { should eq 0 }
-    its('stdout') { should match /conda/ }
-  end
-end
diff --git a/datascience-notebook/tests/controls/jupyter_lab_spec.rb b/datascience-notebook/tests/controls/jupyter_lab_spec.rb
deleted file mode 100644
index 355a755..0000000
--- a/datascience-notebook/tests/controls/jupyter_lab_spec.rb
+++ /dev/null
@@ -1,13 +0,0 @@
-title 'Jupyter Lab'
-
-control 'JupyerLab version' do
-  impact 'high'
-  title 'JupyterLab verioon'
-  desc 'JupyerLab should be the correct verion'
-  tag 'JupyterLab'
-
-  describe command('jupyter-lab --version') do
-    its('exit_status') { should eq 0 }
-    its('stdout') { should match /3.1.11/ }
-  end
-end
diff --git a/datascience-notebook/tests/controls/nano_spec.rb b/datascience-notebook/tests/controls/nano_spec.rb
deleted file mode 100644
index 1a974f0..0000000
--- a/datascience-notebook/tests/controls/nano_spec.rb
+++ /dev/null
@@ -1,19 +0,0 @@
-title 'nano is available and default editor'
-
-control 'nano available and default editor' do
-  impact 'low'
-  title 'nano is default editor'
-  desc 'nano is a simple editor, it should be the default one'
-  tag 'nano'
-  tag 'editor'
-
-  describe command('nano --version') do
-    its('exit_status') { should eq 0 }
-  end
-
-  describe command('editor --version') do
-    its('exit_status') { should eq 0 }
-    its('stdout') { should match /GNU nano, version/ }
-  end
-end
-
diff --git a/datascience-notebook/tests/controls/pip_spec.rb b/datascience-notebook/tests/controls/pip_spec.rb
deleted file mode 100644
index 6878cea..0000000
--- a/datascience-notebook/tests/controls/pip_spec.rb
+++ /dev/null
@@ -1,29 +0,0 @@
-title 'Working Pip'
-
-control 'Pip available' do
-  impact 'high'
-  title 'Pip should be available to use'
-  desc 'Pip is the preferred installer for python packages.'
-  tag 'installer'
-  tag 'pip'
-
-  describe command('pip  --version') do
-    its('exit_status') { should eq 0 }
-    its('stdout') { should match /pip 21/ }
-  end
-end
-
-control 'osmnx' do
-  impact 'high'
-  title 'Install osmnx'
-  desc 'Data scientists should be able top use osmnx' \
-  ' OSMX is often installed, but difficult to install with system pacakges' \
-  ' and rtree needing installing, which often break without lots of debugging.'
-  tag 'installer'
-  tag 'pip'
-
-  describe command('pip install osmnx') do
-    its('exit_status') { should eq 0 }
-    its('stdout') { should match /Successfully installed/ }
-  end
-end
diff --git a/datascience-notebook/tests/controls/user_spec.rb b/datascience-notebook/tests/controls/user_spec.rb
deleted file mode 100644
index ec293b6..0000000
--- a/datascience-notebook/tests/controls/user_spec.rb
+++ /dev/null
@@ -1,25 +0,0 @@
-title 'Jovyan User'
-
-control 'Common Users' do
-  impact 'high'
-  title 'The jovyan user should exist'
-  desc 'The joyvan user should exist This makes sure that it has the same UID & GID as the rstudio images'
-  tag 'user'
-  tag 'group'
-
-  describe user('jovyan') do
-    it { should exist }
-    its('uid') { should eq 1000 }
-  end
-end
-
-#control 'Common Groups' do
-#  impact 'high'
-#  title 'The joyvan user should have the corect groups'
-#  desc 'joyvan should have the primary group of users and also be in the staff group to match RStudio, but not break this image'
-#
-#  describe user('jovyan') do
-#    its('gid') { should eq 100 }
-#    its('groups') { should eq ['users', 'staff']}
-#  end
-#end
diff --git a/datascience-notebook/tests/files/pandas_read_homedir.py b/datascience-notebook/tests/files/pandas_read_homedir.py
deleted file mode 100644
index c2f04c5..0000000
--- a/datascience-notebook/tests/files/pandas_read_homedir.py
+++ /dev/null
@@ -1,3 +0,0 @@
-import pandas as pd
-
-pd.read_csv("path/to/home_dir")
diff --git a/datascience-notebook/tests/files/pandas_read_s3.py b/datascience-notebook/tests/files/pandas_read_s3.py
deleted file mode 100644
index 405e54d..0000000
--- a/datascience-notebook/tests/files/pandas_read_s3.py
+++ /dev/null
@@ -1,3 +0,0 @@
-import pandas as pd
-
-pd.read_csv("s3://blah")
diff --git a/datascience-notebook/tests/files/spark_read_s3.py b/datascience-notebook/tests/files/spark_read_s3.py
deleted file mode 100644
index 25e026a..0000000
--- a/datascience-notebook/tests/files/spark_read_s3.py
+++ /dev/null
@@ -1,3 +0,0 @@
-
-
-spark.read_parquet()
diff --git a/datascience-notebook/tests/inspec.lock b/datascience-notebook/tests/inspec.lock
deleted file mode 100644
index e687b9b..0000000
--- a/datascience-notebook/tests/inspec.lock
+++ /dev/null
@@ -1,3 +0,0 @@
----
-lockfile_version: 1
-depends: []
diff --git a/datascience-notebook/tests/inspec.yml b/datascience-notebook/tests/inspec.yml
deleted file mode 100644
index 0256e1c..0000000
--- a/datascience-notebook/tests/inspec.yml
+++ /dev/null
@@ -1,9 +0,0 @@
----
-name: Datascience Notebook
-title: Datascience Notebook
-maintainer: Analytical Platform
-copyright: Copyright (c) 2020 Crown Copyright (Ministry of Justice)
-copyright_email: "analytics-platform-tech@digital.justice.gov.uk"
-license: MIT
-summary: An InSpec Compliance Profile for Analytical Platform Jupyter Datascience Notebook
-version: 0.1.0
diff --git a/files/hdfs-site.xml b/files/hdfs-site.xml
deleted file mode 100644
index 617f159..0000000
--- a/files/hdfs-site.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-<configuration>
-    <property>
-        <name>fs.s3a.impl</name>
-        <value>org.apache.hadoop.fs.s3a.S3AFileSystem</value>
-    </property>
-</configuration>
diff --git a/files/pyspark-s3.py b/files/pyspark-s3.py
deleted file mode 100644
index da6f313..0000000
--- a/files/pyspark-s3.py
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python
-
-import os
-os.environ['PYSPARK_SUBMIT_ARGS'] = '--packages com.amazonaws:aws-java-sdk:1.7.4,org.apache.hadoop:hadoop-aws:2.7.1 pyspark-shell'
-
-import pyspark
-sc = pyspark.SparkContext("local[*]")
-
-from pyspark.sql import SQLContext
-sqlContext = SQLContext(sc)
-
-hadoopConf = sc._jsc.hadoopConfiguration()
-hadoopConf.set("fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
diff --git a/oracle-datascience-notebook/Dockerfile b/oracle-datascience-notebook/Dockerfile
deleted file mode 100644
index eb79c8a..0000000
--- a/oracle-datascience-notebook/Dockerfile
+++ /dev/null
@@ -1,60 +0,0 @@
-FROM jupyter/datascience-notebook:76402a27fd13
-
-LABEL maintainer=analytics-platform-tech@digital.justice.gov.uk
-
-USER root
-
-ENV PATH=$PATH:$HOME/.local/bin \
-  CHOWN_HOME=no \
-  ORACLE_HOME=/opt/oracle/instantclient_19_6 \
-  LD_LIBRARY_PATH=$ORACLE_HOME:$LD_LIBRARY_PATH \
-  PATH=$PATH:$ORACLE_HOME \
-  INSTANT_CLIENT_VERSION="19.6.0.0"
-
-RUN apt-get update \
-  && apt-get install -y \
-  ca-certificates-java \
-  openjdk-8-jdk \
-  openssh-client \
-  software-properties-common \
-  gdal-bin \
-  libspatialindex-dev \
-  libaio1 \
-  && rm -rf /var/lib/apt/lists/*
-
-RUN update-alternatives --set editor /bin/nano \
-  && usermod -a -G "staff,users" "${NB_USER}"
-
-# Install Oracle Instant Client and SQL*Plus
-# See: https://www.oracle.com/uk/database/technologies/instant-client/linux-x86-64-downloads.html#ic_x64_inst
-RUN mkdir /opt/oracle \
-  && cd /opt/oracle \
-  && curl -sO https://download.oracle.com/otn_software/linux/instantclient/19600/instantclient-basiclite-linux.x64-${INSTANT_CLIENT_VERSION}.0dbru.zip \
-  && curl -sO https://download.oracle.com/otn_software/linux/instantclient/19600/instantclient-sqlplus-linux.x64-${INSTANT_CLIENT_VERSION}.0dbru.zip \
-  && unzip instantclient-basiclite-linux.x64-${INSTANT_CLIENT_VERSION}.0dbru.zip \
-  && unzip instantclient-sqlplus-linux.x64-${INSTANT_CLIENT_VERSION}.0dbru.zip \
-  && rm instantclient-basiclite-linux.x64-${INSTANT_CLIENT_VERSION}.0dbru.zip instantclient-sqlplus-linux.x64-${INSTANT_CLIENT_VERSION}.0dbru.zip
-
-RUN sh -c "echo /opt/oracle/instantclient_19_6 > /etc/ld.so.conf.d/oracle-instantclient.conf" \
-  && ldconfig
-
-USER $NB_USER
-RUN pip install --upgrade \
-  boto3 \
-  black \
-  nbstripout \
-  rtree \
-  s3fs
-
-ENV JUPYTERLAB_DIR="/home/jovyan/.jupyter"
-RUN jupyter labextension install \
-  @jupyter-widgets/jupyterlab-manager \
-  @jupyterlab/github \
-  @jupyterlab/git
-
-RUN pip install flake8 \
-  && jupyter labextension install jupyterlab-flake8
-
-RUN pip install jupyterlab_code_formatter==1.3.8 \
-  && jupyter labextension install @ryantam626/jupyterlab_code_formatter@v1.3.8 \
-  && jupyter serverextension enable --user --py jupyterlab_code_formatter
diff --git a/oracle-datascience-notebook/Dockerfile.tests b/oracle-datascience-notebook/Dockerfile.tests
deleted file mode 100644
index 65c837f..0000000
--- a/oracle-datascience-notebook/Dockerfile.tests
+++ /dev/null
@@ -1,4 +0,0 @@
-FROM bash
-
-COPY tests /tests
-COPY files /files
diff --git a/oracle-datascience-notebook/Makefile b/oracle-datascience-notebook/Makefile
deleted file mode 100644
index 73589b5..0000000
--- a/oracle-datascience-notebook/Makefile
+++ /dev/null
@@ -1,31 +0,0 @@
-SHELL = '/bin/bash'
-export IMAGE_TAG ?= local
-export BUILD_TAG ?= latest
-export DOCKER_BUILDKIT=1
-export REPOSITORY=oracle-datascience-notebook
-export REGISTRY?=mojanalytics
-export NETWORK?=default
-export CHEF_LICENSE=accept-no-persist
-
-.PHONY: build test pull push inspec up clean
-
-pull:
-	docker pull ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG}
-
-build:
-	docker-compose build tests
-	docker build --network=${NETWORK} -t ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG} .
-
-push:
-	docker push ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG}
-
-test: clean up
-	echo Testing Container Version: ${IMAGE_TAG}
-	docker-compose --project-name ${REPOSITORY} run --rm inspec exec tests -t docker://${REPOSITORY}_test_1
-
-clean:
-	docker-compose down --volumes --remove-orphans
-	docker-compose --project-name ${REPOSITORY} down --volumes
-
-up:
-	docker-compose --project-name ${REPOSITORY} up -d tests test
diff --git a/oracle-datascience-notebook/docker-compose.yml b/oracle-datascience-notebook/docker-compose.yml
deleted file mode 100644
index 60e5b67..0000000
--- a/oracle-datascience-notebook/docker-compose.yml
+++ /dev/null
@@ -1,32 +0,0 @@
----
-version: "3.7"
-
-services:
-  tests:
-    build:
-      context: .
-      dockerfile: Dockerfile.tests
-      network: ${NETWORK:-default}
-    volumes:
-      - tests:/tests
-  test:
-    image: ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG:-latest}
-    network_mode: ${NETWORK:-default}
-    command:
-      - "/usr/local/bin/start-notebook.sh"
-      - "--NotebookApp.token=''"
-    ports: [8888:8888]
-    environment: [JUPYTER_ENABLE_LAB=true]
-    volumes:
-      - tests:/share/tests
-  inspec:
-    image: chef/inspec:current
-    network_mode: ${NETWORK:-default}
-    environment:
-      CHEF_LICENSE: accept-no-persist
-    volumes:
-      - /var/run/docker.sock:/var/run/docker.sock
-      - tests:/share/tests:ro
-
-volumes:
-  tests:
diff --git a/oracle-datascience-notebook/files/hdfs-site.xml b/oracle-datascience-notebook/files/hdfs-site.xml
deleted file mode 100644
index 617f159..0000000
--- a/oracle-datascience-notebook/files/hdfs-site.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-<configuration>
-    <property>
-        <name>fs.s3a.impl</name>
-        <value>org.apache.hadoop.fs.s3a.S3AFileSystem</value>
-    </property>
-</configuration>
diff --git a/oracle-datascience-notebook/files/pyspark-s3.py b/oracle-datascience-notebook/files/pyspark-s3.py
deleted file mode 100644
index da6f313..0000000
--- a/oracle-datascience-notebook/files/pyspark-s3.py
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python
-
-import os
-os.environ['PYSPARK_SUBMIT_ARGS'] = '--packages com.amazonaws:aws-java-sdk:1.7.4,org.apache.hadoop:hadoop-aws:2.7.1 pyspark-shell'
-
-import pyspark
-sc = pyspark.SparkContext("local[*]")
-
-from pyspark.sql import SQLContext
-sqlContext = SQLContext(sc)
-
-hadoopConf = sc._jsc.hadoopConfiguration()
-hadoopConf.set("fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
diff --git a/oracle-datascience-notebook/tests/controls/conda_spec.rb b/oracle-datascience-notebook/tests/controls/conda_spec.rb
deleted file mode 100644
index c4fa936..0000000
--- a/oracle-datascience-notebook/tests/controls/conda_spec.rb
+++ /dev/null
@@ -1,15 +0,0 @@
-title 'Working Conda'
-
-control 'Conda available' do
-  impact 'high'
-  title 'Conda installer should be available to use'
-  desc 'The Conda installer is not preferred, but is the only way to install some packages.'
-  tag 'installer'
-  tag 'conda'
-
-  describe command('conda info') do
-    its('exit_status') { should eq 0 }
-    its('stdout') { should match /conda/ }
-  end
-end
-
diff --git a/oracle-datascience-notebook/tests/controls/nano_spec.rb b/oracle-datascience-notebook/tests/controls/nano_spec.rb
deleted file mode 100644
index 1a974f0..0000000
--- a/oracle-datascience-notebook/tests/controls/nano_spec.rb
+++ /dev/null
@@ -1,19 +0,0 @@
-title 'nano is available and default editor'
-
-control 'nano available and default editor' do
-  impact 'low'
-  title 'nano is default editor'
-  desc 'nano is a simple editor, it should be the default one'
-  tag 'nano'
-  tag 'editor'
-
-  describe command('nano --version') do
-    its('exit_status') { should eq 0 }
-  end
-
-  describe command('editor --version') do
-    its('exit_status') { should eq 0 }
-    its('stdout') { should match /GNU nano, version/ }
-  end
-end
-
diff --git a/oracle-datascience-notebook/tests/controls/pip_spec.rb b/oracle-datascience-notebook/tests/controls/pip_spec.rb
deleted file mode 100644
index eb31df1..0000000
--- a/oracle-datascience-notebook/tests/controls/pip_spec.rb
+++ /dev/null
@@ -1,14 +0,0 @@
-title 'Working Pip'
-
-control 'Pip available' do
-  impact 'high'
-  title 'Pip should be available to use'
-  desc 'Pip is the preferred installer for python packages.'
-  tag 'installer'
-  tag 'pip'
-
-  describe command('pip  --version') do
-    its('exit_status') { should eq 0 }
-    its('stdout') { should match /pip 20/ }
-  end
-end
diff --git a/oracle-datascience-notebook/tests/controls/user_spec.rb b/oracle-datascience-notebook/tests/controls/user_spec.rb
deleted file mode 100644
index f3a8555..0000000
--- a/oracle-datascience-notebook/tests/controls/user_spec.rb
+++ /dev/null
@@ -1,25 +0,0 @@
-title 'Jovyan User'
-
-control 'Common Users' do
-  impact 'high'
-  title 'The jovyan user should exist'
-  desc 'The joyvan user should exist This makes sure that it has the same UID & GID as the rstudio images'
-  tag 'user'
-  tag 'group'
-
-  describe user('jovyan') do
-    it { should exist }
-    its('uid') { should eq 1000 }
-  end
-end
-
-control 'Common Groups' do
-  impact 'high'
-  title 'The joyvan user should have the corect groups'
-  desc 'joyvan should have the primary group of users and also be in the staff group to match RStudio, but not break this image'
-
-  describe user('jovyan') do
-    its('gid') { should eq 100 }
-    its('groups') { should eq ['users', 'staff']}
-  end
-end
diff --git a/oracle-datascience-notebook/tests/files/pandas_read_homedir.py b/oracle-datascience-notebook/tests/files/pandas_read_homedir.py
deleted file mode 100644
index c2f04c5..0000000
--- a/oracle-datascience-notebook/tests/files/pandas_read_homedir.py
+++ /dev/null
@@ -1,3 +0,0 @@
-import pandas as pd
-
-pd.read_csv("path/to/home_dir")
diff --git a/oracle-datascience-notebook/tests/files/pandas_read_s3.py b/oracle-datascience-notebook/tests/files/pandas_read_s3.py
deleted file mode 100644
index 405e54d..0000000
--- a/oracle-datascience-notebook/tests/files/pandas_read_s3.py
+++ /dev/null
@@ -1,3 +0,0 @@
-import pandas as pd
-
-pd.read_csv("s3://blah")
diff --git a/oracle-datascience-notebook/tests/files/spark_read_s3.py b/oracle-datascience-notebook/tests/files/spark_read_s3.py
deleted file mode 100644
index 25e026a..0000000
--- a/oracle-datascience-notebook/tests/files/spark_read_s3.py
+++ /dev/null
@@ -1,3 +0,0 @@
-
-
-spark.read_parquet()
diff --git a/oracle-datascience-notebook/tests/inspec.lock b/oracle-datascience-notebook/tests/inspec.lock
deleted file mode 100644
index e687b9b..0000000
--- a/oracle-datascience-notebook/tests/inspec.lock
+++ /dev/null
@@ -1,3 +0,0 @@
----
-lockfile_version: 1
-depends: []
diff --git a/oracle-datascience-notebook/tests/inspec.yml b/oracle-datascience-notebook/tests/inspec.yml
deleted file mode 100644
index 1bd8f79..0000000
--- a/oracle-datascience-notebook/tests/inspec.yml
+++ /dev/null
@@ -1,9 +0,0 @@
----
-name: Oracle Datascience Notebook
-title: Oracle Datascience Notebook
-maintainer: Analytical Platform
-copyright: Copyright (c) 2020 Crown Copyright (Ministry of Justice)
-copyright_email: "analytics-platform-tech@digital.justice.gov.uk"
-license: MIT
-summary: An InSpec Compliance Profile for Analytical Platform Jupyter Datascience Notebook
-version: 0.1.0
diff --git a/scripts/build-and-test.sh b/scripts/build-and-test.sh
new file mode 100644
index 0000000..f0f9db7
--- /dev/null
+++ b/scripts/build-and-test.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+
+IMAGE="${1}"
+IMAGE_TAG="analytical-platform.service.justice.gov.uk/${IMAGE}:local"
+CONTAINER_STRUCTURE_TEST_IMAGE="gcr.io/gcp-runtimes/container-structure-test:latest"
+
+if [[ "${REMOTE_CONTAINERS}" ]] && [[ "$(uname -m)" == "aarch64" ]]; then
+  echo "(⚠) Looks like you're running in a dev container on Apple Silicon."
+  echo "(⚠) This script builds linux/amd64 images which might take a long time or even fail."
+  export PLATFORM_FLAG="--platform linux/amd64"
+fi
+
+echo "Building [ ${IMAGE} ] as [ ${IMAGE_TAG} ]"
+
+docker build ${PLATFORM_FLAG} --file "${IMAGE}/Dockerfile" --tag "${IMAGE_TAG}" "${IMAGE}"
+
+if [[ -f "${IMAGE}/test/container-structure-test.yml" ]]; then
+  echo "Running container structure test for [ ${IMAGE_TAG} ]"
+
+  docker run --rm ${PLATFORM_FLAG} \
+    --volume /var/run/docker.sock:/var/run/docker.sock \
+    --volume "${PWD}:/workspace" \
+    --workdir /workspace \
+    "${CONTAINER_STRUCTURE_TEST_IMAGE}" \
+    test --image "${IMAGE_TAG}" --config "/workspace/${IMAGE}/test/container-structure-test.yml"
+fi
diff --git a/datascience-notebook/files/hdfs-site.xml b/src/files/hdfs-site.xml
similarity index 100%
rename from datascience-notebook/files/hdfs-site.xml
rename to src/files/hdfs-site.xml
diff --git a/datascience-notebook/files/pyspark-s3.py b/src/files/pyspark-s3.py
similarity index 100%
rename from datascience-notebook/files/pyspark-s3.py
rename to src/files/pyspark-s3.py