From 26a21c062ee79e2921fd41e5beb22710a57ccfef Mon Sep 17 00:00:00 2001 From: Jacob Woffenden Date: Mon, 29 Jan 2024 17:22:18 +0000 Subject: [PATCH] =?UTF-8?q?=E2=AC=86=EF=B8=8F=20Upgrade=20Jupyter=20image?= =?UTF-8?q?=20(#76)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jacob Woffenden --- .github/workflows/build-and-test.yml | 62 ++++++++++ .../workflows/jupyter-lab-test-and-build.yml | 108 ------------------ .github/workflows/publish.yml | 46 ++++++++ allspark-notebook/Dockerfile | 80 +++++-------- allspark-notebook/Dockerfile.tests | 4 - allspark-notebook/Makefile | 31 ----- allspark-notebook/docker-compose.yml | 32 ------ allspark-notebook/files/pyspark-s3.py | 7 -- .../test/container-structure-test.yml | 41 +++++++ .../tests/controls/conda_spec.rb | 15 --- allspark-notebook/tests/controls/nano_spec.rb | 19 --- allspark-notebook/tests/controls/pip_spec.rb | 29 ----- allspark-notebook/tests/controls/user_spec.rb | 25 ---- .../tests/files/pandas_read_homedir.py | 3 - .../tests/files/pandas_read_s3.py | 3 - .../tests/files/spark_read_s3.py | 8 -- allspark-notebook/tests/inspec.lock | 3 - allspark-notebook/tests/inspec.yml | 9 -- datascience-notebook/Dockerfile | 60 +++++----- datascience-notebook/Dockerfile.tests | 4 - datascience-notebook/Makefile | 34 ------ datascience-notebook/docker-compose.yml | 32 ------ .../test/container-structure-test.yml | 46 ++++++++ .../tests/controls/conda_spec.rb | 14 --- .../tests/controls/jupyter_lab_spec.rb | 13 --- .../tests/controls/nano_spec.rb | 19 --- .../tests/controls/pip_spec.rb | 29 ----- .../tests/controls/user_spec.rb | 25 ---- .../tests/files/pandas_read_homedir.py | 3 - .../tests/files/pandas_read_s3.py | 3 - .../tests/files/spark_read_s3.py | 3 - datascience-notebook/tests/inspec.lock | 3 - datascience-notebook/tests/inspec.yml | 9 -- files/hdfs-site.xml | 6 - files/pyspark-s3.py | 13 --- oracle-datascience-notebook/Dockerfile | 60 ---------- oracle-datascience-notebook/Dockerfile.tests | 4 - oracle-datascience-notebook/Makefile | 31 ----- .../docker-compose.yml | 32 ------ .../files/hdfs-site.xml | 6 - .../files/pyspark-s3.py | 13 --- .../tests/controls/conda_spec.rb | 15 --- .../tests/controls/nano_spec.rb | 19 --- .../tests/controls/pip_spec.rb | 14 --- .../tests/controls/user_spec.rb | 25 ---- .../tests/files/pandas_read_homedir.py | 3 - .../tests/files/pandas_read_s3.py | 3 - .../tests/files/spark_read_s3.py | 3 - oracle-datascience-notebook/tests/inspec.lock | 3 - oracle-datascience-notebook/tests/inspec.yml | 9 -- scripts/build-and-test.sh | 26 +++++ .../files/hdfs-site.xml | 0 .../files/pyspark-s3.py | 0 53 files changed, 279 insertions(+), 828 deletions(-) create mode 100644 .github/workflows/build-and-test.yml delete mode 100644 .github/workflows/jupyter-lab-test-and-build.yml create mode 100644 .github/workflows/publish.yml delete mode 100644 allspark-notebook/Dockerfile.tests delete mode 100644 allspark-notebook/Makefile delete mode 100644 allspark-notebook/docker-compose.yml delete mode 100644 allspark-notebook/files/pyspark-s3.py create mode 100644 allspark-notebook/test/container-structure-test.yml delete mode 100644 allspark-notebook/tests/controls/conda_spec.rb delete mode 100644 allspark-notebook/tests/controls/nano_spec.rb delete mode 100644 allspark-notebook/tests/controls/pip_spec.rb delete mode 100644 allspark-notebook/tests/controls/user_spec.rb delete mode 100644 allspark-notebook/tests/files/pandas_read_homedir.py delete mode 100644 allspark-notebook/tests/files/pandas_read_s3.py delete mode 100644 allspark-notebook/tests/files/spark_read_s3.py delete mode 100644 allspark-notebook/tests/inspec.lock delete mode 100644 allspark-notebook/tests/inspec.yml delete mode 100644 datascience-notebook/Dockerfile.tests delete mode 100644 datascience-notebook/Makefile delete mode 100644 datascience-notebook/docker-compose.yml create mode 100644 datascience-notebook/test/container-structure-test.yml delete mode 100644 datascience-notebook/tests/controls/conda_spec.rb delete mode 100644 datascience-notebook/tests/controls/jupyter_lab_spec.rb delete mode 100644 datascience-notebook/tests/controls/nano_spec.rb delete mode 100644 datascience-notebook/tests/controls/pip_spec.rb delete mode 100644 datascience-notebook/tests/controls/user_spec.rb delete mode 100644 datascience-notebook/tests/files/pandas_read_homedir.py delete mode 100644 datascience-notebook/tests/files/pandas_read_s3.py delete mode 100644 datascience-notebook/tests/files/spark_read_s3.py delete mode 100644 datascience-notebook/tests/inspec.lock delete mode 100644 datascience-notebook/tests/inspec.yml delete mode 100644 files/hdfs-site.xml delete mode 100644 files/pyspark-s3.py delete mode 100644 oracle-datascience-notebook/Dockerfile delete mode 100644 oracle-datascience-notebook/Dockerfile.tests delete mode 100644 oracle-datascience-notebook/Makefile delete mode 100644 oracle-datascience-notebook/docker-compose.yml delete mode 100644 oracle-datascience-notebook/files/hdfs-site.xml delete mode 100644 oracle-datascience-notebook/files/pyspark-s3.py delete mode 100644 oracle-datascience-notebook/tests/controls/conda_spec.rb delete mode 100644 oracle-datascience-notebook/tests/controls/nano_spec.rb delete mode 100644 oracle-datascience-notebook/tests/controls/pip_spec.rb delete mode 100644 oracle-datascience-notebook/tests/controls/user_spec.rb delete mode 100644 oracle-datascience-notebook/tests/files/pandas_read_homedir.py delete mode 100644 oracle-datascience-notebook/tests/files/pandas_read_s3.py delete mode 100644 oracle-datascience-notebook/tests/files/spark_read_s3.py delete mode 100644 oracle-datascience-notebook/tests/inspec.lock delete mode 100644 oracle-datascience-notebook/tests/inspec.yml create mode 100644 scripts/build-and-test.sh rename {datascience-notebook => src}/files/hdfs-site.xml (100%) rename {datascience-notebook => src}/files/pyspark-s3.py (100%) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml new file mode 100644 index 0000000..0c5af91 --- /dev/null +++ b/.github/workflows/build-and-test.yml @@ -0,0 +1,62 @@ +--- +name: Test and Build + +on: + pull_request: + branches: + - main + +permissions: {} # yamllint disable-line + +jobs: + yamllint: + name: YAML Lint + runs-on: ubuntu-latest + permissions: + contents: read + steps: + - name: Checkout + id: checkout + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + + - name: Run yamllint + id: run_yamllint + uses: actionshub/yamllint@b772a30c3ba90c5f5aadfe94d8f3599e3a7099c8 # v1.8.2 + + markdownlint: + name: Markdown Lint + runs-on: ubuntu-latest + permissions: + contents: read + steps: + - name: Checkout + id: checkout + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + + - name: Run mdl + id: run_mdl + uses: actionshub/markdownlint@6c82ff529253530dfbf75c37570876c52692835f # v3.1.4 + + build-and-test: + if: github.ref != 'main' + name: Build and Test + runs-on: ubuntu-latest + permissions: + contents: read + strategy: + fail-fast: false + max-parallel: 3 + matrix: + flavour: + - "allspark-notebook" + - "datascience-notebook" + steps: + - name: Checkout + id: checkout + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + + - name: Build and Test + id: build_and_test + shell: bash + run: | + bash scripts/build-and-test.sh "${{ matrix.flavour }}" diff --git a/.github/workflows/jupyter-lab-test-and-build.yml b/.github/workflows/jupyter-lab-test-and-build.yml deleted file mode 100644 index ba12268..0000000 --- a/.github/workflows/jupyter-lab-test-and-build.yml +++ /dev/null @@ -1,108 +0,0 @@ ---- -name: JupyterLab - test and build - -on: - pull_request: - push: - branches: [main] - tags: "*" - -jobs: - yamllint: - runs-on: ubuntu-latest - steps: - - name: Check out code - uses: actions/checkout@main - - name: Run yaml Lint - uses: actionshub/yamllint@main - - mdl: - runs-on: ubuntu-latest - steps: - - name: Check out code - uses: actions/checkout@main - - name: Run Markdown Lint - uses: actionshub/markdownlint@main - - docker: - runs-on: [self-hosted, management-ecr] - strategy: - fail-fast: false - max-parallel: 3 - matrix: - flavour: - - "datascience-notebook" - - "allspark-notebook" - - "oracle-datascience-notebook" - env: - REPOSITORY: ${{ matrix.flavour }} - ECR_REPOSITORY: ${{ matrix.flavour }} - needs: [mdl, yamllint] - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-region: eu-west-1 - role-to-assume: arn:aws:iam::593291632749:role/github-actions-management-ecr - role-duration-seconds: 1200 - - name: Login to Amazon ECR - id: login-ecr - uses: aws-actions/amazon-ecr-login@v1 - with: - registries: 593291632749 - - name: Check out code - uses: actions/checkout@v2 - - name: Prep Tags - id: prep - run: | - TAG=noop - if [[ $GITHUB_REF == refs/tags/* ]]; then - TAG=${GITHUB_REF#refs/tags/} - elif [[ $GITHUB_REF == refs/heads/* ]]; then - TAG=$(echo ${GITHUB_REF#refs/heads/} | sed -r 's#/+#-#g') - if [ "${{ github.event.repository.default_branch }}" = "$TAG" ]; then - TAG=edge - fi - elif [[ $GITHUB_REF == refs/pull/* ]]; then - TAG=pr-${{ github.event.number }} - elif [ "${{ github.event_name }}" = "push" ]; then - TAG="sha-${GITHUB_SHA::8}" - fi - - echo "Docker image tag = '$TAG'" - - echo ::set-output name=tag::${TAG} - echo ::set-output name=created::$(date -u +'%Y-%m-%dT%H:%M:%SZ') - - - name: Build image - working-directory: "./${{ matrix.flavour }}" - run: make build - env: - NETWORK: host - REGISTRY: ${{ steps.login-ecr.outputs.registry }} - IMAGE_TAG: ${{ steps.prep.outputs.tag }} - - name: Install InSpec - uses: actionshub/chef-install@main - with: - channel: current - project: inspec - - name: Test - working-directory: "./${{ matrix.flavour }}" - run: make test - env: - NETWORK: host - REGISTRY: ${{ steps.login-ecr.outputs.registry }} - IMAGE_TAG: ${{ steps.prep.outputs.tag }} - - name: Push image - working-directory: "./${{ matrix.flavour }}" - run: make push - env: - REGISTRY: ${{ steps.login-ecr.outputs.registry }} - IMAGE_TAG: ${{ steps.prep.outputs.tag }} - - name: Cleanup - if: ${{ always() }} - working-directory: "./${{ matrix.flavour }}" - run: make clean - env: - REGISTRY: ${{ steps.login-ecr.outputs.registry }} - IMAGE_TAG: ${{ steps.prep.outputs.tag }} diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 0000000..64c77ba --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,46 @@ +--- +name: Publish + +on: + push: + tags: + - "v*" + +permissions: {} # yamllint disable-line + +jobs: + publish: + name: Publish + runs-on: ubuntu-latest + permissions: + contents: read + id-token: write + packages: write + strategy: + fail-fast: false + max-parallel: 3 + matrix: + flavour: + - "allspark-notebook" + - "datascience-notebook" + steps: + - name: Checkout + id: checkout + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + + - name: Log in to GitHub Container Registry + id: login_ghcr + uses: docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d # v3.0.0 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and Push + id: build_and_push + uses: docker/build-push-action@4a13e500e55cf31b7a5d59a38ab2040ab0f42f56 # v5.1.0 + with: + context: ${{ matrix.flavour }} + file: ${{ matrix.flavour }}/Dockerfile + push: true + tags: ghcr.io/ministryofjustice/analytical-platform-${{ matrix.flavour }}:${{ github.ref_name }} diff --git a/allspark-notebook/Dockerfile b/allspark-notebook/Dockerfile index c90ffa9..2961154 100644 --- a/allspark-notebook/Dockerfile +++ b/allspark-notebook/Dockerfile @@ -1,57 +1,35 @@ -FROM jupyter/all-spark-notebook:spark-3.1.1@sha256:b73dad39ad5c469a92764e38d7cc4321040d3fedddcad7fcebc4ddc7f9c15ff2 +# lab-4.0.11 +FROM quay.io/jupyter/all-spark-notebook@sha256:a63b0faed54bc21d17a4691d8fae177dd95236e0adddbd9d43ee448dc2d5ba1e -LABEL maintainer=analytics-platform-tech@digital.justice.gov.uk +LABEL org.opencontainers.image.vendor="Ministry of Justice" \ + org.opencontainers.image.authors="Analytical Platform" \ + org.opencontainers.image.title="Jupyter All Spark Notebook" \ + maintainer="analytics-platform-tech@digital.justice.gov.uk" -ENV PATH=$PATH:$HOME/.local/bin +ENV PATH="${PATH}:${HOME}/.local/bin" \ + CHOWN_HOME="no" \ + PYSPARK_SUBMIT_ARGS="--packages com.amazonaws:aws-java-sdk:1.12.134,org.apache.hadoop:hadoop-aws:3.0.1 pyspark-shell" -# Home directory contents is already owned by UID 1000 -ENV CHOWN_HOME=no - -# NB these are sensible defaults but may need to be changed programatically for -# non local spark (ie. EMR etc.) -ENV PYSPARK_SUBMIT_ARGS="--packages com.amazonaws:aws-java-sdk:1.12.134,org.apache.hadoop:hadoop-aws:3.0.1 pyspark-shell" - -# Container must be run as root to use NB_UID USER root -# Install OS pacakges -# -# The reason we have installed these has been lost. Including just in case. -# -# - gdal-bin -# - libspatialindex-dev -# - openssh-client -# -RUN apt-get update && \ - apt-get install -y \ - gdal-bin \ - libspatialindex-dev \ - openssh-client && \ - rm -rf /var/lib/apt/lists/* - -# I'm not sure this has any effect +RUN apt-get update --yes \ + && apt-get install --yes \ + gdal-bin \ + libspatialindex-dev \ + openssh-client \ + && apt-get clean --yes \ + && rm -rf /var/lib/apt/lists/* \ + && pip install --no-cache-dir --upgrade \ + pip \ + boto3 \ + nbstripout \ + s3fs==2023.12.2 \ + dataengineeringutils3==1.4.3 \ + etl-manager==7.6.0 \ + && conda install --yes \ + nbstripout \ + && nbstripout --install --system \ + && update-alternatives --set editor /bin/nano-tiny + +COPY files/add-user-to-group.sh /usr/local/bin/before-notebook.d/add-user-to-group.sh COPY files/hdfs-site.xml /usr/local/spark/conf/hdfs-site.xml - -# add-user-to-group.sh adds the $NB_USER to group 50 (staff) used by RStudio -COPY files/add-user-to-group.sh /usr/local/bin/before-notebook.d/ - -# Install python packages -# - pip - python package manager -# - boto3 - python AWS library -# - nbstripout - tool for stripping sensitive data out of notebooks -# -RUN pip install --upgrade \ - pip \ - boto3 \ - nbstripout \ - "s3fs<=0.4" \ - dataengineeringutils3==1.3.0 \ - etl-manager==7.4.0 - -RUN conda install --yes \ - 'nbstripout' - -RUN nbstripout --install --system - -# Vi just doesn't cut it for some people -RUN update-alternatives --set editor /bin/nano-tiny diff --git a/allspark-notebook/Dockerfile.tests b/allspark-notebook/Dockerfile.tests deleted file mode 100644 index 65c837f..0000000 --- a/allspark-notebook/Dockerfile.tests +++ /dev/null @@ -1,4 +0,0 @@ -FROM bash - -COPY tests /tests -COPY files /files diff --git a/allspark-notebook/Makefile b/allspark-notebook/Makefile deleted file mode 100644 index 810f421..0000000 --- a/allspark-notebook/Makefile +++ /dev/null @@ -1,31 +0,0 @@ -SHELL = '/bin/bash' -export IMAGE_TAG ?= local -export BUILD_TAG ?= latest -export DOCKER_BUILDKIT=1 -export REPOSITORY=allspark-notebook -export REGISTRY?=mojanalytics -export NETWORK?=default -export CHEF_LICENSE=accept-no-persist - -.PHONY: build test pull push inspec up clean - -pull: - docker pull ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG} - -build: - docker-compose build tests - docker build --network=${NETWORK} -t ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG} . - -push: - docker push ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG} - -test: clean up - echo Testing Container Version: ${IMAGE_TAG} - docker-compose --project-name ${REPOSITORY} run --rm inspec exec tests -t docker://${REPOSITORY}_test_1 - -clean: - docker-compose down --volumes --remove-orphans - docker-compose --project-name ${REPOSITORY} down --remove-orphans - -up: - docker-compose --project-name ${REPOSITORY} up -d tests test diff --git a/allspark-notebook/docker-compose.yml b/allspark-notebook/docker-compose.yml deleted file mode 100644 index 60e5b67..0000000 --- a/allspark-notebook/docker-compose.yml +++ /dev/null @@ -1,32 +0,0 @@ ---- -version: "3.7" - -services: - tests: - build: - context: . - dockerfile: Dockerfile.tests - network: ${NETWORK:-default} - volumes: - - tests:/tests - test: - image: ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG:-latest} - network_mode: ${NETWORK:-default} - command: - - "/usr/local/bin/start-notebook.sh" - - "--NotebookApp.token=''" - ports: [8888:8888] - environment: [JUPYTER_ENABLE_LAB=true] - volumes: - - tests:/share/tests - inspec: - image: chef/inspec:current - network_mode: ${NETWORK:-default} - environment: - CHEF_LICENSE: accept-no-persist - volumes: - - /var/run/docker.sock:/var/run/docker.sock - - tests:/share/tests:ro - -volumes: - tests: diff --git a/allspark-notebook/files/pyspark-s3.py b/allspark-notebook/files/pyspark-s3.py deleted file mode 100644 index 74c7f9a..0000000 --- a/allspark-notebook/files/pyspark-s3.py +++ /dev/null @@ -1,7 +0,0 @@ -#!/usr/bin/env python - -import pyspark -sc = pyspark.SparkContext("local[*]") - -hadoopConf = sc._jsc.hadoopConfiguration() -hadoopConf.set("fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") diff --git a/allspark-notebook/test/container-structure-test.yml b/allspark-notebook/test/container-structure-test.yml new file mode 100644 index 0000000..63c37cc --- /dev/null +++ b/allspark-notebook/test/container-structure-test.yml @@ -0,0 +1,41 @@ +--- +schemaVersion: 2.0.0 + +containerRunOptions: + user: "jovyan" + +commandTests: + - name: "whoami" + command: "whoami" + expectedOutput: ["jovyan"] + + - name: "user" + command: "id" + args: ["--user", "jovyan"] + expectedOutput: ["1000"] + + - name: "groups" + command: "id" + args: ["--groups", "jovyan"] + expectedOutput: ["100"] + + - name: "conda" + command: "conda" + args: ["info"] + expectedOutput: [".*active environment.*"] + + - name: "nano" + command: "nano" + args: ["--version"] + expectedOutput: ["GNU nano.*"] + + - name: "pip" + command: "pip" + args: ["--version"] + expectedOutput: ["pip.*"] + + - name: "install osmnx" + command: "pip" + args: ["install", "osmnx"] + exitCode: 0 + expectedOutput: ["Successfully installed.*"] diff --git a/allspark-notebook/tests/controls/conda_spec.rb b/allspark-notebook/tests/controls/conda_spec.rb deleted file mode 100644 index c4fa936..0000000 --- a/allspark-notebook/tests/controls/conda_spec.rb +++ /dev/null @@ -1,15 +0,0 @@ -title 'Working Conda' - -control 'Conda available' do - impact 'high' - title 'Conda installer should be available to use' - desc 'The Conda installer is not preferred, but is the only way to install some packages.' - tag 'installer' - tag 'conda' - - describe command('conda info') do - its('exit_status') { should eq 0 } - its('stdout') { should match /conda/ } - end -end - diff --git a/allspark-notebook/tests/controls/nano_spec.rb b/allspark-notebook/tests/controls/nano_spec.rb deleted file mode 100644 index 1a974f0..0000000 --- a/allspark-notebook/tests/controls/nano_spec.rb +++ /dev/null @@ -1,19 +0,0 @@ -title 'nano is available and default editor' - -control 'nano available and default editor' do - impact 'low' - title 'nano is default editor' - desc 'nano is a simple editor, it should be the default one' - tag 'nano' - tag 'editor' - - describe command('nano --version') do - its('exit_status') { should eq 0 } - end - - describe command('editor --version') do - its('exit_status') { should eq 0 } - its('stdout') { should match /GNU nano, version/ } - end -end - diff --git a/allspark-notebook/tests/controls/pip_spec.rb b/allspark-notebook/tests/controls/pip_spec.rb deleted file mode 100644 index 81d46b4..0000000 --- a/allspark-notebook/tests/controls/pip_spec.rb +++ /dev/null @@ -1,29 +0,0 @@ -title 'Working Pip' - -control 'Pip available' do - impact 'high' - title 'Pip should be available to use' - desc 'Pip is the preferred installer for python packages.' - tag 'installer' - tag 'pip' - - describe command('pip --version') do - its('exit_status') { should eq 0 } - its('stdout') { should match /pip 2/ } - end -end - -control 'osmnx' do - impact 'high' - title 'Install osmnx' - desc 'Data scientists should be able top use osmnx' \ - ' OSMX is often installed, but difficult to install with system pacakges' \ - ' and rtree needing installing, which often break without lots of debugging.' - tag 'installer' - tag 'pip' - - describe command('pip install osmnx') do - its('exit_status') { should eq 0 } - its('stdout') { should match /Successfully installed/ } - end -end diff --git a/allspark-notebook/tests/controls/user_spec.rb b/allspark-notebook/tests/controls/user_spec.rb deleted file mode 100644 index feca701..0000000 --- a/allspark-notebook/tests/controls/user_spec.rb +++ /dev/null @@ -1,25 +0,0 @@ -title 'Jovyan User' - -control 'Common Users' do - impact 'high' - title 'The jovyan user should exist' - desc 'The joyvan user should exist and should have a UID of 1000' - tag 'user' - tag 'group' - - describe user('jovyan') do - it { should exist } - its('uid') { should eq 1000 } - end -end - -control 'Common Groups' do - impact 'high' - title 'The joyvan user should have the corect groups' - desc 'joyvan should have the primary group of users and also be in the staff group to match RStudio, but not break this image' - - describe user('jovyan') do - its('gid') { should eq 100 } - its('groups') { should eq ['users', 'staff']} - end -end diff --git a/allspark-notebook/tests/files/pandas_read_homedir.py b/allspark-notebook/tests/files/pandas_read_homedir.py deleted file mode 100644 index c2f04c5..0000000 --- a/allspark-notebook/tests/files/pandas_read_homedir.py +++ /dev/null @@ -1,3 +0,0 @@ -import pandas as pd - -pd.read_csv("path/to/home_dir") diff --git a/allspark-notebook/tests/files/pandas_read_s3.py b/allspark-notebook/tests/files/pandas_read_s3.py deleted file mode 100644 index 405e54d..0000000 --- a/allspark-notebook/tests/files/pandas_read_s3.py +++ /dev/null @@ -1,3 +0,0 @@ -import pandas as pd - -pd.read_csv("s3://blah") diff --git a/allspark-notebook/tests/files/spark_read_s3.py b/allspark-notebook/tests/files/spark_read_s3.py deleted file mode 100644 index f2171ee..0000000 --- a/allspark-notebook/tests/files/spark_read_s3.py +++ /dev/null @@ -1,8 +0,0 @@ -from pyspark.context import SparkContext -from pyspark.sql import SparkSession - -sc = SparkContext.getOrCreate() -spark = SparkSession(sc) - -df = spark.read.csv("s3a://bucket/path/to/file.csv") -df.limit(10).show() diff --git a/allspark-notebook/tests/inspec.lock b/allspark-notebook/tests/inspec.lock deleted file mode 100644 index e687b9b..0000000 --- a/allspark-notebook/tests/inspec.lock +++ /dev/null @@ -1,3 +0,0 @@ ---- -lockfile_version: 1 -depends: [] diff --git a/allspark-notebook/tests/inspec.yml b/allspark-notebook/tests/inspec.yml deleted file mode 100644 index b4fcaa6..0000000 --- a/allspark-notebook/tests/inspec.yml +++ /dev/null @@ -1,9 +0,0 @@ ---- -name: Allspark Notebook -title: Allspark Notebook -maintainer: Analytical Platform -copyright: Copyright (c) 2020 Crown Copyright (Ministry of Justice) -copyright_email: "analytics-platform-tech@digital.justice.gov.uk" -license: MIT -summary: An InSpec Compliance Profile for Analytical Platform Allspark Notebook -version: 0.1.0 diff --git a/datascience-notebook/Dockerfile b/datascience-notebook/Dockerfile index e962ee7..a262ee2 100644 --- a/datascience-notebook/Dockerfile +++ b/datascience-notebook/Dockerfile @@ -1,36 +1,34 @@ -FROM jupyter/datascience-notebook:lab-3.1.11 +# lab-4.0.11 +FROM quay.io/jupyter/datascience-notebook@sha256:76148e403aa44017f59b1dd0861d91daae800c7f86e9f39138b9d2703b885082 -LABEL maintainer=analytics-platform-tech@digital.justice.gov.uk +LABEL org.opencontainers.image.vendor="Ministry of Justice" \ + org.opencontainers.image.authors="Analytical Platform" \ + org.opencontainers.image.title="Jupyter Datascience Notebook" \ + maintainer="analytics-platform-tech@digital.justice.gov.uk" -ENV PATH=$PATH:$HOME/.local/bin - -# Home directory contents is already owned by UID 1000 -ENV CHOWN_HOME=no +ENV PATH="${PATH}:${HOME}/.local/bin" \ + CHOWN_HOME="no" USER root -RUN apt-get update \ - && apt-get install -y \ - ca-certificates-java \ - openjdk-8-jdk \ - openssh-client \ - software-properties-common \ - gdal-bin \ - libspatialindex-dev \ - && rm -rf /var/lib/apt/lists/* - -RUN update-alternatives --set editor /bin/nano-tiny - -RUN pip install --upgrade \ - boto3 \ - black \ - nbstripout \ - rtree \ - "s3fs<=0.4" - -RUN pip install --upgrade jupyterlab-git - -RUN conda install --yes \ - 'nbstripout' - -RUN nbstripout --install --system +RUN apt-get update --yes \ + && apt-get install --yes \ + ca-certificates-java \ + openjdk-8-jdk \ + openssh-client \ + software-properties-common \ + gdal-bin \ + libspatialindex-dev \ + && apt-get clean --yes \ + && rm -rf /var/lib/apt/lists/* \ + && pip install --no-cache-dir --upgrade \ + boto3 \ + black \ + nbstripout \ + rtree \ + s3fs==2023.12.2 \ + jupyterlab-git \ + && conda install --yes \ + nbstripout \ + && nbstripout --install --system \ + && update-alternatives --set editor /bin/nano-tiny diff --git a/datascience-notebook/Dockerfile.tests b/datascience-notebook/Dockerfile.tests deleted file mode 100644 index 65c837f..0000000 --- a/datascience-notebook/Dockerfile.tests +++ /dev/null @@ -1,4 +0,0 @@ -FROM bash - -COPY tests /tests -COPY files /files diff --git a/datascience-notebook/Makefile b/datascience-notebook/Makefile deleted file mode 100644 index a8b8019..0000000 --- a/datascience-notebook/Makefile +++ /dev/null @@ -1,34 +0,0 @@ -SHELL = '/bin/bash' -export IMAGE_TAG ?= local -export BUILD_TAG ?= latest -export DOCKER_BUILDKIT=1 -export REPOSITORY=datascience-notebook -export REGISTRY?=mojanalytics -export NETWORK?=default -export CHEF_LICENSE=accept-no-persist - -.PHONY: build test pull push inspec up clean - -pull: - docker pull ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG} - -build: - docker-compose build tests - docker build --network=${NETWORK} -t ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG} . - -push: - docker push ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG} - -test: clean up - echo Testing Container Version: ${IMAGE_TAG} - docker-compose --project-name ${REPOSITORY} run --rm inspec exec tests -t docker://${REPOSITORY}_test_1 - -clean: - docker-compose down --volumes --remove-orphans - docker-compose --project-name ${REPOSITORY} down --volumes - -up: - docker-compose --project-name ${REPOSITORY} up -d tests test - -enter: - docker-compose --project-name ${REPOSITORY} run test bash diff --git a/datascience-notebook/docker-compose.yml b/datascience-notebook/docker-compose.yml deleted file mode 100644 index 60e5b67..0000000 --- a/datascience-notebook/docker-compose.yml +++ /dev/null @@ -1,32 +0,0 @@ ---- -version: "3.7" - -services: - tests: - build: - context: . - dockerfile: Dockerfile.tests - network: ${NETWORK:-default} - volumes: - - tests:/tests - test: - image: ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG:-latest} - network_mode: ${NETWORK:-default} - command: - - "/usr/local/bin/start-notebook.sh" - - "--NotebookApp.token=''" - ports: [8888:8888] - environment: [JUPYTER_ENABLE_LAB=true] - volumes: - - tests:/share/tests - inspec: - image: chef/inspec:current - network_mode: ${NETWORK:-default} - environment: - CHEF_LICENSE: accept-no-persist - volumes: - - /var/run/docker.sock:/var/run/docker.sock - - tests:/share/tests:ro - -volumes: - tests: diff --git a/datascience-notebook/test/container-structure-test.yml b/datascience-notebook/test/container-structure-test.yml new file mode 100644 index 0000000..deb39e2 --- /dev/null +++ b/datascience-notebook/test/container-structure-test.yml @@ -0,0 +1,46 @@ +--- +schemaVersion: 2.0.0 + +containerRunOptions: + user: "jovyan" + +commandTests: + - name: "whoami" + command: "whoami" + expectedOutput: ["jovyan"] + + - name: "user" + command: "id" + args: ["--user", "jovyan"] + expectedOutput: ["1000"] + + - name: "groups" + command: "id" + args: ["--groups", "jovyan"] + expectedOutput: ["100"] + + - name: "conda" + command: "conda" + args: ["info"] + expectedOutput: [".*active environment.*"] + + - name: "juptyer-lab" + command: "jupyter-lab" + args: ["--version"] + expectedOutput: ["4.0.11"] + + - name: "nano" + command: "nano" + args: ["--version"] + expectedOutput: ["GNU nano.*"] + + - name: "pip" + command: "pip" + args: ["--version"] + expectedOutput: ["pip.*"] + + - name: "install osmnx" + command: "pip" + args: ["install", "osmnx"] + exitCode: 0 + expectedOutput: ["Successfully installed.*"] diff --git a/datascience-notebook/tests/controls/conda_spec.rb b/datascience-notebook/tests/controls/conda_spec.rb deleted file mode 100644 index c646439..0000000 --- a/datascience-notebook/tests/controls/conda_spec.rb +++ /dev/null @@ -1,14 +0,0 @@ -title 'Working Conda' - -control 'Conda available' do - impact 'high' - title 'Conda installer should be available to use' - desc 'The Conda installer is not preferred, but is the only way to install some packages.' - tag 'installer' - tag 'conda' - - describe command('conda info') do - its('exit_status') { should eq 0 } - its('stdout') { should match /conda/ } - end -end diff --git a/datascience-notebook/tests/controls/jupyter_lab_spec.rb b/datascience-notebook/tests/controls/jupyter_lab_spec.rb deleted file mode 100644 index 355a755..0000000 --- a/datascience-notebook/tests/controls/jupyter_lab_spec.rb +++ /dev/null @@ -1,13 +0,0 @@ -title 'Jupyter Lab' - -control 'JupyerLab version' do - impact 'high' - title 'JupyterLab verioon' - desc 'JupyerLab should be the correct verion' - tag 'JupyterLab' - - describe command('jupyter-lab --version') do - its('exit_status') { should eq 0 } - its('stdout') { should match /3.1.11/ } - end -end diff --git a/datascience-notebook/tests/controls/nano_spec.rb b/datascience-notebook/tests/controls/nano_spec.rb deleted file mode 100644 index 1a974f0..0000000 --- a/datascience-notebook/tests/controls/nano_spec.rb +++ /dev/null @@ -1,19 +0,0 @@ -title 'nano is available and default editor' - -control 'nano available and default editor' do - impact 'low' - title 'nano is default editor' - desc 'nano is a simple editor, it should be the default one' - tag 'nano' - tag 'editor' - - describe command('nano --version') do - its('exit_status') { should eq 0 } - end - - describe command('editor --version') do - its('exit_status') { should eq 0 } - its('stdout') { should match /GNU nano, version/ } - end -end - diff --git a/datascience-notebook/tests/controls/pip_spec.rb b/datascience-notebook/tests/controls/pip_spec.rb deleted file mode 100644 index 6878cea..0000000 --- a/datascience-notebook/tests/controls/pip_spec.rb +++ /dev/null @@ -1,29 +0,0 @@ -title 'Working Pip' - -control 'Pip available' do - impact 'high' - title 'Pip should be available to use' - desc 'Pip is the preferred installer for python packages.' - tag 'installer' - tag 'pip' - - describe command('pip --version') do - its('exit_status') { should eq 0 } - its('stdout') { should match /pip 21/ } - end -end - -control 'osmnx' do - impact 'high' - title 'Install osmnx' - desc 'Data scientists should be able top use osmnx' \ - ' OSMX is often installed, but difficult to install with system pacakges' \ - ' and rtree needing installing, which often break without lots of debugging.' - tag 'installer' - tag 'pip' - - describe command('pip install osmnx') do - its('exit_status') { should eq 0 } - its('stdout') { should match /Successfully installed/ } - end -end diff --git a/datascience-notebook/tests/controls/user_spec.rb b/datascience-notebook/tests/controls/user_spec.rb deleted file mode 100644 index ec293b6..0000000 --- a/datascience-notebook/tests/controls/user_spec.rb +++ /dev/null @@ -1,25 +0,0 @@ -title 'Jovyan User' - -control 'Common Users' do - impact 'high' - title 'The jovyan user should exist' - desc 'The joyvan user should exist This makes sure that it has the same UID & GID as the rstudio images' - tag 'user' - tag 'group' - - describe user('jovyan') do - it { should exist } - its('uid') { should eq 1000 } - end -end - -#control 'Common Groups' do -# impact 'high' -# title 'The joyvan user should have the corect groups' -# desc 'joyvan should have the primary group of users and also be in the staff group to match RStudio, but not break this image' -# -# describe user('jovyan') do -# its('gid') { should eq 100 } -# its('groups') { should eq ['users', 'staff']} -# end -#end diff --git a/datascience-notebook/tests/files/pandas_read_homedir.py b/datascience-notebook/tests/files/pandas_read_homedir.py deleted file mode 100644 index c2f04c5..0000000 --- a/datascience-notebook/tests/files/pandas_read_homedir.py +++ /dev/null @@ -1,3 +0,0 @@ -import pandas as pd - -pd.read_csv("path/to/home_dir") diff --git a/datascience-notebook/tests/files/pandas_read_s3.py b/datascience-notebook/tests/files/pandas_read_s3.py deleted file mode 100644 index 405e54d..0000000 --- a/datascience-notebook/tests/files/pandas_read_s3.py +++ /dev/null @@ -1,3 +0,0 @@ -import pandas as pd - -pd.read_csv("s3://blah") diff --git a/datascience-notebook/tests/files/spark_read_s3.py b/datascience-notebook/tests/files/spark_read_s3.py deleted file mode 100644 index 25e026a..0000000 --- a/datascience-notebook/tests/files/spark_read_s3.py +++ /dev/null @@ -1,3 +0,0 @@ - - -spark.read_parquet() diff --git a/datascience-notebook/tests/inspec.lock b/datascience-notebook/tests/inspec.lock deleted file mode 100644 index e687b9b..0000000 --- a/datascience-notebook/tests/inspec.lock +++ /dev/null @@ -1,3 +0,0 @@ ---- -lockfile_version: 1 -depends: [] diff --git a/datascience-notebook/tests/inspec.yml b/datascience-notebook/tests/inspec.yml deleted file mode 100644 index 0256e1c..0000000 --- a/datascience-notebook/tests/inspec.yml +++ /dev/null @@ -1,9 +0,0 @@ ---- -name: Datascience Notebook -title: Datascience Notebook -maintainer: Analytical Platform -copyright: Copyright (c) 2020 Crown Copyright (Ministry of Justice) -copyright_email: "analytics-platform-tech@digital.justice.gov.uk" -license: MIT -summary: An InSpec Compliance Profile for Analytical Platform Jupyter Datascience Notebook -version: 0.1.0 diff --git a/files/hdfs-site.xml b/files/hdfs-site.xml deleted file mode 100644 index 617f159..0000000 --- a/files/hdfs-site.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - fs.s3a.impl - org.apache.hadoop.fs.s3a.S3AFileSystem - - diff --git a/files/pyspark-s3.py b/files/pyspark-s3.py deleted file mode 100644 index da6f313..0000000 --- a/files/pyspark-s3.py +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env python - -import os -os.environ['PYSPARK_SUBMIT_ARGS'] = '--packages com.amazonaws:aws-java-sdk:1.7.4,org.apache.hadoop:hadoop-aws:2.7.1 pyspark-shell' - -import pyspark -sc = pyspark.SparkContext("local[*]") - -from pyspark.sql import SQLContext -sqlContext = SQLContext(sc) - -hadoopConf = sc._jsc.hadoopConfiguration() -hadoopConf.set("fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") diff --git a/oracle-datascience-notebook/Dockerfile b/oracle-datascience-notebook/Dockerfile deleted file mode 100644 index eb79c8a..0000000 --- a/oracle-datascience-notebook/Dockerfile +++ /dev/null @@ -1,60 +0,0 @@ -FROM jupyter/datascience-notebook:76402a27fd13 - -LABEL maintainer=analytics-platform-tech@digital.justice.gov.uk - -USER root - -ENV PATH=$PATH:$HOME/.local/bin \ - CHOWN_HOME=no \ - ORACLE_HOME=/opt/oracle/instantclient_19_6 \ - LD_LIBRARY_PATH=$ORACLE_HOME:$LD_LIBRARY_PATH \ - PATH=$PATH:$ORACLE_HOME \ - INSTANT_CLIENT_VERSION="19.6.0.0" - -RUN apt-get update \ - && apt-get install -y \ - ca-certificates-java \ - openjdk-8-jdk \ - openssh-client \ - software-properties-common \ - gdal-bin \ - libspatialindex-dev \ - libaio1 \ - && rm -rf /var/lib/apt/lists/* - -RUN update-alternatives --set editor /bin/nano \ - && usermod -a -G "staff,users" "${NB_USER}" - -# Install Oracle Instant Client and SQL*Plus -# See: https://www.oracle.com/uk/database/technologies/instant-client/linux-x86-64-downloads.html#ic_x64_inst -RUN mkdir /opt/oracle \ - && cd /opt/oracle \ - && curl -sO https://download.oracle.com/otn_software/linux/instantclient/19600/instantclient-basiclite-linux.x64-${INSTANT_CLIENT_VERSION}.0dbru.zip \ - && curl -sO https://download.oracle.com/otn_software/linux/instantclient/19600/instantclient-sqlplus-linux.x64-${INSTANT_CLIENT_VERSION}.0dbru.zip \ - && unzip instantclient-basiclite-linux.x64-${INSTANT_CLIENT_VERSION}.0dbru.zip \ - && unzip instantclient-sqlplus-linux.x64-${INSTANT_CLIENT_VERSION}.0dbru.zip \ - && rm instantclient-basiclite-linux.x64-${INSTANT_CLIENT_VERSION}.0dbru.zip instantclient-sqlplus-linux.x64-${INSTANT_CLIENT_VERSION}.0dbru.zip - -RUN sh -c "echo /opt/oracle/instantclient_19_6 > /etc/ld.so.conf.d/oracle-instantclient.conf" \ - && ldconfig - -USER $NB_USER -RUN pip install --upgrade \ - boto3 \ - black \ - nbstripout \ - rtree \ - s3fs - -ENV JUPYTERLAB_DIR="/home/jovyan/.jupyter" -RUN jupyter labextension install \ - @jupyter-widgets/jupyterlab-manager \ - @jupyterlab/github \ - @jupyterlab/git - -RUN pip install flake8 \ - && jupyter labextension install jupyterlab-flake8 - -RUN pip install jupyterlab_code_formatter==1.3.8 \ - && jupyter labextension install @ryantam626/jupyterlab_code_formatter@v1.3.8 \ - && jupyter serverextension enable --user --py jupyterlab_code_formatter diff --git a/oracle-datascience-notebook/Dockerfile.tests b/oracle-datascience-notebook/Dockerfile.tests deleted file mode 100644 index 65c837f..0000000 --- a/oracle-datascience-notebook/Dockerfile.tests +++ /dev/null @@ -1,4 +0,0 @@ -FROM bash - -COPY tests /tests -COPY files /files diff --git a/oracle-datascience-notebook/Makefile b/oracle-datascience-notebook/Makefile deleted file mode 100644 index 73589b5..0000000 --- a/oracle-datascience-notebook/Makefile +++ /dev/null @@ -1,31 +0,0 @@ -SHELL = '/bin/bash' -export IMAGE_TAG ?= local -export BUILD_TAG ?= latest -export DOCKER_BUILDKIT=1 -export REPOSITORY=oracle-datascience-notebook -export REGISTRY?=mojanalytics -export NETWORK?=default -export CHEF_LICENSE=accept-no-persist - -.PHONY: build test pull push inspec up clean - -pull: - docker pull ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG} - -build: - docker-compose build tests - docker build --network=${NETWORK} -t ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG} . - -push: - docker push ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG} - -test: clean up - echo Testing Container Version: ${IMAGE_TAG} - docker-compose --project-name ${REPOSITORY} run --rm inspec exec tests -t docker://${REPOSITORY}_test_1 - -clean: - docker-compose down --volumes --remove-orphans - docker-compose --project-name ${REPOSITORY} down --volumes - -up: - docker-compose --project-name ${REPOSITORY} up -d tests test diff --git a/oracle-datascience-notebook/docker-compose.yml b/oracle-datascience-notebook/docker-compose.yml deleted file mode 100644 index 60e5b67..0000000 --- a/oracle-datascience-notebook/docker-compose.yml +++ /dev/null @@ -1,32 +0,0 @@ ---- -version: "3.7" - -services: - tests: - build: - context: . - dockerfile: Dockerfile.tests - network: ${NETWORK:-default} - volumes: - - tests:/tests - test: - image: ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG:-latest} - network_mode: ${NETWORK:-default} - command: - - "/usr/local/bin/start-notebook.sh" - - "--NotebookApp.token=''" - ports: [8888:8888] - environment: [JUPYTER_ENABLE_LAB=true] - volumes: - - tests:/share/tests - inspec: - image: chef/inspec:current - network_mode: ${NETWORK:-default} - environment: - CHEF_LICENSE: accept-no-persist - volumes: - - /var/run/docker.sock:/var/run/docker.sock - - tests:/share/tests:ro - -volumes: - tests: diff --git a/oracle-datascience-notebook/files/hdfs-site.xml b/oracle-datascience-notebook/files/hdfs-site.xml deleted file mode 100644 index 617f159..0000000 --- a/oracle-datascience-notebook/files/hdfs-site.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - fs.s3a.impl - org.apache.hadoop.fs.s3a.S3AFileSystem - - diff --git a/oracle-datascience-notebook/files/pyspark-s3.py b/oracle-datascience-notebook/files/pyspark-s3.py deleted file mode 100644 index da6f313..0000000 --- a/oracle-datascience-notebook/files/pyspark-s3.py +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env python - -import os -os.environ['PYSPARK_SUBMIT_ARGS'] = '--packages com.amazonaws:aws-java-sdk:1.7.4,org.apache.hadoop:hadoop-aws:2.7.1 pyspark-shell' - -import pyspark -sc = pyspark.SparkContext("local[*]") - -from pyspark.sql import SQLContext -sqlContext = SQLContext(sc) - -hadoopConf = sc._jsc.hadoopConfiguration() -hadoopConf.set("fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") diff --git a/oracle-datascience-notebook/tests/controls/conda_spec.rb b/oracle-datascience-notebook/tests/controls/conda_spec.rb deleted file mode 100644 index c4fa936..0000000 --- a/oracle-datascience-notebook/tests/controls/conda_spec.rb +++ /dev/null @@ -1,15 +0,0 @@ -title 'Working Conda' - -control 'Conda available' do - impact 'high' - title 'Conda installer should be available to use' - desc 'The Conda installer is not preferred, but is the only way to install some packages.' - tag 'installer' - tag 'conda' - - describe command('conda info') do - its('exit_status') { should eq 0 } - its('stdout') { should match /conda/ } - end -end - diff --git a/oracle-datascience-notebook/tests/controls/nano_spec.rb b/oracle-datascience-notebook/tests/controls/nano_spec.rb deleted file mode 100644 index 1a974f0..0000000 --- a/oracle-datascience-notebook/tests/controls/nano_spec.rb +++ /dev/null @@ -1,19 +0,0 @@ -title 'nano is available and default editor' - -control 'nano available and default editor' do - impact 'low' - title 'nano is default editor' - desc 'nano is a simple editor, it should be the default one' - tag 'nano' - tag 'editor' - - describe command('nano --version') do - its('exit_status') { should eq 0 } - end - - describe command('editor --version') do - its('exit_status') { should eq 0 } - its('stdout') { should match /GNU nano, version/ } - end -end - diff --git a/oracle-datascience-notebook/tests/controls/pip_spec.rb b/oracle-datascience-notebook/tests/controls/pip_spec.rb deleted file mode 100644 index eb31df1..0000000 --- a/oracle-datascience-notebook/tests/controls/pip_spec.rb +++ /dev/null @@ -1,14 +0,0 @@ -title 'Working Pip' - -control 'Pip available' do - impact 'high' - title 'Pip should be available to use' - desc 'Pip is the preferred installer for python packages.' - tag 'installer' - tag 'pip' - - describe command('pip --version') do - its('exit_status') { should eq 0 } - its('stdout') { should match /pip 20/ } - end -end diff --git a/oracle-datascience-notebook/tests/controls/user_spec.rb b/oracle-datascience-notebook/tests/controls/user_spec.rb deleted file mode 100644 index f3a8555..0000000 --- a/oracle-datascience-notebook/tests/controls/user_spec.rb +++ /dev/null @@ -1,25 +0,0 @@ -title 'Jovyan User' - -control 'Common Users' do - impact 'high' - title 'The jovyan user should exist' - desc 'The joyvan user should exist This makes sure that it has the same UID & GID as the rstudio images' - tag 'user' - tag 'group' - - describe user('jovyan') do - it { should exist } - its('uid') { should eq 1000 } - end -end - -control 'Common Groups' do - impact 'high' - title 'The joyvan user should have the corect groups' - desc 'joyvan should have the primary group of users and also be in the staff group to match RStudio, but not break this image' - - describe user('jovyan') do - its('gid') { should eq 100 } - its('groups') { should eq ['users', 'staff']} - end -end diff --git a/oracle-datascience-notebook/tests/files/pandas_read_homedir.py b/oracle-datascience-notebook/tests/files/pandas_read_homedir.py deleted file mode 100644 index c2f04c5..0000000 --- a/oracle-datascience-notebook/tests/files/pandas_read_homedir.py +++ /dev/null @@ -1,3 +0,0 @@ -import pandas as pd - -pd.read_csv("path/to/home_dir") diff --git a/oracle-datascience-notebook/tests/files/pandas_read_s3.py b/oracle-datascience-notebook/tests/files/pandas_read_s3.py deleted file mode 100644 index 405e54d..0000000 --- a/oracle-datascience-notebook/tests/files/pandas_read_s3.py +++ /dev/null @@ -1,3 +0,0 @@ -import pandas as pd - -pd.read_csv("s3://blah") diff --git a/oracle-datascience-notebook/tests/files/spark_read_s3.py b/oracle-datascience-notebook/tests/files/spark_read_s3.py deleted file mode 100644 index 25e026a..0000000 --- a/oracle-datascience-notebook/tests/files/spark_read_s3.py +++ /dev/null @@ -1,3 +0,0 @@ - - -spark.read_parquet() diff --git a/oracle-datascience-notebook/tests/inspec.lock b/oracle-datascience-notebook/tests/inspec.lock deleted file mode 100644 index e687b9b..0000000 --- a/oracle-datascience-notebook/tests/inspec.lock +++ /dev/null @@ -1,3 +0,0 @@ ---- -lockfile_version: 1 -depends: [] diff --git a/oracle-datascience-notebook/tests/inspec.yml b/oracle-datascience-notebook/tests/inspec.yml deleted file mode 100644 index 1bd8f79..0000000 --- a/oracle-datascience-notebook/tests/inspec.yml +++ /dev/null @@ -1,9 +0,0 @@ ---- -name: Oracle Datascience Notebook -title: Oracle Datascience Notebook -maintainer: Analytical Platform -copyright: Copyright (c) 2020 Crown Copyright (Ministry of Justice) -copyright_email: "analytics-platform-tech@digital.justice.gov.uk" -license: MIT -summary: An InSpec Compliance Profile for Analytical Platform Jupyter Datascience Notebook -version: 0.1.0 diff --git a/scripts/build-and-test.sh b/scripts/build-and-test.sh new file mode 100644 index 0000000..f0f9db7 --- /dev/null +++ b/scripts/build-and-test.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash + +IMAGE="${1}" +IMAGE_TAG="analytical-platform.service.justice.gov.uk/${IMAGE}:local" +CONTAINER_STRUCTURE_TEST_IMAGE="gcr.io/gcp-runtimes/container-structure-test:latest" + +if [[ "${REMOTE_CONTAINERS}" ]] && [[ "$(uname -m)" == "aarch64" ]]; then + echo "(⚠) Looks like you're running in a dev container on Apple Silicon." + echo "(⚠) This script builds linux/amd64 images which might take a long time or even fail." + export PLATFORM_FLAG="--platform linux/amd64" +fi + +echo "Building [ ${IMAGE} ] as [ ${IMAGE_TAG} ]" + +docker build ${PLATFORM_FLAG} --file "${IMAGE}/Dockerfile" --tag "${IMAGE_TAG}" "${IMAGE}" + +if [[ -f "${IMAGE}/test/container-structure-test.yml" ]]; then + echo "Running container structure test for [ ${IMAGE_TAG} ]" + + docker run --rm ${PLATFORM_FLAG} \ + --volume /var/run/docker.sock:/var/run/docker.sock \ + --volume "${PWD}:/workspace" \ + --workdir /workspace \ + "${CONTAINER_STRUCTURE_TEST_IMAGE}" \ + test --image "${IMAGE_TAG}" --config "/workspace/${IMAGE}/test/container-structure-test.yml" +fi diff --git a/datascience-notebook/files/hdfs-site.xml b/src/files/hdfs-site.xml similarity index 100% rename from datascience-notebook/files/hdfs-site.xml rename to src/files/hdfs-site.xml diff --git a/datascience-notebook/files/pyspark-s3.py b/src/files/pyspark-s3.py similarity index 100% rename from datascience-notebook/files/pyspark-s3.py rename to src/files/pyspark-s3.py