-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: Jacob Woffenden <jacob.woffenden@digital.justice.gov.uk>
- Loading branch information
1 parent
6c69339
commit 26a21c0
Showing
53 changed files
with
279 additions
and
828 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
--- | ||
name: Test and Build | ||
|
||
on: | ||
pull_request: | ||
branches: | ||
- main | ||
|
||
permissions: {} # yamllint disable-line | ||
|
||
jobs: | ||
yamllint: | ||
name: YAML Lint | ||
runs-on: ubuntu-latest | ||
permissions: | ||
contents: read | ||
steps: | ||
- name: Checkout | ||
id: checkout | ||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 | ||
|
||
- name: Run yamllint | ||
id: run_yamllint | ||
uses: actionshub/yamllint@b772a30c3ba90c5f5aadfe94d8f3599e3a7099c8 # v1.8.2 | ||
|
||
markdownlint: | ||
name: Markdown Lint | ||
runs-on: ubuntu-latest | ||
permissions: | ||
contents: read | ||
steps: | ||
- name: Checkout | ||
id: checkout | ||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 | ||
|
||
- name: Run mdl | ||
id: run_mdl | ||
uses: actionshub/markdownlint@6c82ff529253530dfbf75c37570876c52692835f # v3.1.4 | ||
|
||
build-and-test: | ||
if: github.ref != 'main' | ||
name: Build and Test | ||
runs-on: ubuntu-latest | ||
permissions: | ||
contents: read | ||
strategy: | ||
fail-fast: false | ||
max-parallel: 3 | ||
matrix: | ||
flavour: | ||
- "allspark-notebook" | ||
- "datascience-notebook" | ||
steps: | ||
- name: Checkout | ||
id: checkout | ||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 | ||
|
||
- name: Build and Test | ||
id: build_and_test | ||
shell: bash | ||
run: | | ||
bash scripts/build-and-test.sh "${{ matrix.flavour }}" |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
--- | ||
name: Publish | ||
|
||
on: | ||
push: | ||
tags: | ||
- "v*" | ||
|
||
permissions: {} # yamllint disable-line | ||
|
||
jobs: | ||
publish: | ||
name: Publish | ||
runs-on: ubuntu-latest | ||
permissions: | ||
contents: read | ||
id-token: write | ||
packages: write | ||
strategy: | ||
fail-fast: false | ||
max-parallel: 3 | ||
matrix: | ||
flavour: | ||
- "allspark-notebook" | ||
- "datascience-notebook" | ||
steps: | ||
- name: Checkout | ||
id: checkout | ||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 | ||
|
||
- name: Log in to GitHub Container Registry | ||
id: login_ghcr | ||
uses: docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d # v3.0.0 | ||
with: | ||
registry: ghcr.io | ||
username: ${{ github.actor }} | ||
password: ${{ secrets.GITHUB_TOKEN }} | ||
|
||
- name: Build and Push | ||
id: build_and_push | ||
uses: docker/build-push-action@4a13e500e55cf31b7a5d59a38ab2040ab0f42f56 # v5.1.0 | ||
with: | ||
context: ${{ matrix.flavour }} | ||
file: ${{ matrix.flavour }}/Dockerfile | ||
push: true | ||
tags: ghcr.io/ministryofjustice/analytical-platform-${{ matrix.flavour }}:${{ github.ref_name }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,57 +1,35 @@ | ||
FROM jupyter/all-spark-notebook:spark-3.1.1@sha256:b73dad39ad5c469a92764e38d7cc4321040d3fedddcad7fcebc4ddc7f9c15ff2 | ||
# lab-4.0.11 | ||
FROM quay.io/jupyter/all-spark-notebook@sha256:a63b0faed54bc21d17a4691d8fae177dd95236e0adddbd9d43ee448dc2d5ba1e | ||
|
||
LABEL maintainer=analytics-platform-tech@digital.justice.gov.uk | ||
LABEL org.opencontainers.image.vendor="Ministry of Justice" \ | ||
org.opencontainers.image.authors="Analytical Platform" \ | ||
org.opencontainers.image.title="Jupyter All Spark Notebook" \ | ||
maintainer="analytics-platform-tech@digital.justice.gov.uk" | ||
|
||
ENV PATH=$PATH:$HOME/.local/bin | ||
ENV PATH="${PATH}:${HOME}/.local/bin" \ | ||
CHOWN_HOME="no" \ | ||
PYSPARK_SUBMIT_ARGS="--packages com.amazonaws:aws-java-sdk:1.12.134,org.apache.hadoop:hadoop-aws:3.0.1 pyspark-shell" | ||
|
||
# Home directory contents is already owned by UID 1000 | ||
ENV CHOWN_HOME=no | ||
|
||
# NB these are sensible defaults but may need to be changed programatically for | ||
# non local spark (ie. EMR etc.) | ||
ENV PYSPARK_SUBMIT_ARGS="--packages com.amazonaws:aws-java-sdk:1.12.134,org.apache.hadoop:hadoop-aws:3.0.1 pyspark-shell" | ||
|
||
# Container must be run as root to use NB_UID | ||
USER root | ||
|
||
# Install OS pacakges | ||
# | ||
# The reason we have installed these has been lost. Including just in case. | ||
# | ||
# - gdal-bin | ||
# - libspatialindex-dev | ||
# - openssh-client | ||
# | ||
RUN apt-get update && \ | ||
apt-get install -y \ | ||
gdal-bin \ | ||
libspatialindex-dev \ | ||
openssh-client && \ | ||
rm -rf /var/lib/apt/lists/* | ||
|
||
# I'm not sure this has any effect | ||
RUN apt-get update --yes \ | ||
&& apt-get install --yes \ | ||
gdal-bin \ | ||
libspatialindex-dev \ | ||
openssh-client \ | ||
&& apt-get clean --yes \ | ||
&& rm -rf /var/lib/apt/lists/* \ | ||
&& pip install --no-cache-dir --upgrade \ | ||
pip \ | ||
boto3 \ | ||
nbstripout \ | ||
s3fs==2023.12.2 \ | ||
dataengineeringutils3==1.4.3 \ | ||
etl-manager==7.6.0 \ | ||
&& conda install --yes \ | ||
nbstripout \ | ||
&& nbstripout --install --system \ | ||
&& update-alternatives --set editor /bin/nano-tiny | ||
|
||
COPY files/add-user-to-group.sh /usr/local/bin/before-notebook.d/add-user-to-group.sh | ||
COPY files/hdfs-site.xml /usr/local/spark/conf/hdfs-site.xml | ||
|
||
# add-user-to-group.sh adds the $NB_USER to group 50 (staff) used by RStudio | ||
COPY files/add-user-to-group.sh /usr/local/bin/before-notebook.d/ | ||
|
||
# Install python packages | ||
# - pip - python package manager | ||
# - boto3 - python AWS library | ||
# - nbstripout - tool for stripping sensitive data out of notebooks | ||
# | ||
RUN pip install --upgrade \ | ||
pip \ | ||
boto3 \ | ||
nbstripout \ | ||
"s3fs<=0.4" \ | ||
dataengineeringutils3==1.3.0 \ | ||
etl-manager==7.4.0 | ||
|
||
RUN conda install --yes \ | ||
'nbstripout' | ||
|
||
RUN nbstripout --install --system | ||
|
||
# Vi just doesn't cut it for some people | ||
RUN update-alternatives --set editor /bin/nano-tiny |
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.