From 1d5f5215250e1de1d3665772ef28ff6ff8a4f7ed Mon Sep 17 00:00:00 2001 From: <> Date: Fri, 1 Sep 2023 12:50:23 +0000 Subject: [PATCH] Deployed ba10eb9 with MkDocs version: 1.4.2 --- .nojekyll | 0 404.html | 933 +++ CNAME | 1 + architecture/ADRs/0001/index.html | 1087 +++ architecture/ADRs/0002/index.html | 1092 +++ architecture/ADRs/index.html | 969 +++ architecture/application-lifecycle/index.html | 988 +++ .../comparison-with-jupyterhub/index.html | 990 +++ architecture/components/index.html | 1115 +++ assets/data-workspace-architecture.png | Bin 0 -> 459963 bytes assets/dit-favicon.png | Bin 0 -> 12248 bytes assets/dit-logo.png | Bin 0 -> 12764 bytes assets/dw-readme-front-page.png | Bin 0 -> 63956 bytes assets/images/favicon.png | Bin 0 -> 1870 bytes assets/images/govuk-crest-2x.png | Bin 0 -> 8884 bytes assets/images/ogl.png | Bin 0 -> 1398 bytes assets/javascripts/bundle.407015b8.min.js | 29 + assets/javascripts/bundle.407015b8.min.js.map | 8 + assets/javascripts/lunr/min/lunr.ar.min.js | 1 + assets/javascripts/lunr/min/lunr.da.min.js | 18 + assets/javascripts/lunr/min/lunr.de.min.js | 18 + assets/javascripts/lunr/min/lunr.du.min.js | 18 + assets/javascripts/lunr/min/lunr.es.min.js | 18 + assets/javascripts/lunr/min/lunr.fi.min.js | 18 + assets/javascripts/lunr/min/lunr.fr.min.js | 18 + assets/javascripts/lunr/min/lunr.hi.min.js | 1 + assets/javascripts/lunr/min/lunr.hu.min.js | 18 + assets/javascripts/lunr/min/lunr.it.min.js | 18 + assets/javascripts/lunr/min/lunr.ja.min.js | 1 + assets/javascripts/lunr/min/lunr.jp.min.js | 1 + assets/javascripts/lunr/min/lunr.ko.min.js | 1 + assets/javascripts/lunr/min/lunr.multi.min.js | 1 + assets/javascripts/lunr/min/lunr.nl.min.js | 18 + assets/javascripts/lunr/min/lunr.no.min.js | 18 + assets/javascripts/lunr/min/lunr.pt.min.js | 18 + assets/javascripts/lunr/min/lunr.ro.min.js | 18 + assets/javascripts/lunr/min/lunr.ru.min.js | 18 + .../lunr/min/lunr.stemmer.support.min.js | 1 + assets/javascripts/lunr/min/lunr.sv.min.js | 18 + assets/javascripts/lunr/min/lunr.ta.min.js | 1 + assets/javascripts/lunr/min/lunr.th.min.js | 1 + assets/javascripts/lunr/min/lunr.tr.min.js | 18 + assets/javascripts/lunr/min/lunr.vi.min.js | 1 + assets/javascripts/lunr/min/lunr.zh.min.js | 1 + assets/javascripts/lunr/tinyseg.js | 206 + assets/javascripts/lunr/wordcut.js | 6708 +++++++++++++++++ .../workers/search.208ed371.min.js | 42 + .../workers/search.208ed371.min.js.map | 8 + assets/stylesheets/extra.css | 185 + assets/stylesheets/main.7a7fce14.min.css | 1 + assets/stylesheets/main.7a7fce14.min.css.map | 1 + assets/stylesheets/palette.a0c5b2b5.min.css | 1 + .../stylesheets/palette.a0c5b2b5.min.css.map | 1 + contributing/index.html | 1109 +++ data-ingestion/index.html | 1056 +++ deployment/aws/index.html | 1097 +++ deployment/other-platforms/index.html | 973 +++ development/assets/pycharm-breakpoint.png | Bin 0 -> 18187 bytes development/assets/pycharm-debug-ouput.png | Bin 0 -> 91686 bytes .../assets/pycharm-remote-interpreter.png | Bin 0 -> 79144 bytes development/assets/pycharm-start-debugger.png | Bin 0 -> 35877 bytes development/assets/remote-debug-server.png | Bin 0 -> 50341 bytes development/assets/vscode-debugger-output.png | Bin 0 -> 79440 bytes development/assets/vscode-run-debug.png | Bin 0 -> 63266 bytes development/database-migrations/index.html | 1022 +++ development/enhancedtables/index.html | 1079 +++ development/remotedebugging/index.html | 1101 +++ development/running-locally/index.html | 1136 +++ development/running-tests/index.html | 1056 +++ development/updating-dependencies/index.html | 967 +++ favicons/moj.ico | Bin 0 -> 15086 bytes index.html | 1032 +++ logos/moj.png | Bin 0 -> 3653 bytes search/search_index.json | 1 + sitemap.xml | 93 + sitemap.xml.gz | Bin 0 -> 208 bytes stylesheets/extra.css | 54 + stylesheets/tags-color.css | 31 + 78 files changed, 26453 insertions(+) create mode 100644 .nojekyll create mode 100644 404.html create mode 100644 CNAME create mode 100644 architecture/ADRs/0001/index.html create mode 100644 architecture/ADRs/0002/index.html create mode 100644 architecture/ADRs/index.html create mode 100644 architecture/application-lifecycle/index.html create mode 100644 architecture/comparison-with-jupyterhub/index.html create mode 100644 architecture/components/index.html create mode 100644 assets/data-workspace-architecture.png create mode 100644 assets/dit-favicon.png create mode 100644 assets/dit-logo.png create mode 100644 assets/dw-readme-front-page.png create mode 100644 assets/images/favicon.png create mode 100644 assets/images/govuk-crest-2x.png create mode 100644 assets/images/ogl.png create mode 100644 assets/javascripts/bundle.407015b8.min.js create mode 100644 assets/javascripts/bundle.407015b8.min.js.map create mode 100644 assets/javascripts/lunr/min/lunr.ar.min.js create mode 100644 assets/javascripts/lunr/min/lunr.da.min.js create mode 100644 assets/javascripts/lunr/min/lunr.de.min.js create mode 100644 assets/javascripts/lunr/min/lunr.du.min.js create mode 100644 assets/javascripts/lunr/min/lunr.es.min.js create mode 100644 assets/javascripts/lunr/min/lunr.fi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.fr.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hu.min.js create mode 100644 assets/javascripts/lunr/min/lunr.it.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ja.min.js create mode 100644 assets/javascripts/lunr/min/lunr.jp.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ko.min.js create mode 100644 assets/javascripts/lunr/min/lunr.multi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.nl.min.js create mode 100644 assets/javascripts/lunr/min/lunr.no.min.js create mode 100644 assets/javascripts/lunr/min/lunr.pt.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ro.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ru.min.js create mode 100644 assets/javascripts/lunr/min/lunr.stemmer.support.min.js create mode 100644 assets/javascripts/lunr/min/lunr.sv.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ta.min.js create mode 100644 assets/javascripts/lunr/min/lunr.th.min.js create mode 100644 assets/javascripts/lunr/min/lunr.tr.min.js create mode 100644 assets/javascripts/lunr/min/lunr.vi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.zh.min.js create mode 100644 assets/javascripts/lunr/tinyseg.js create mode 100644 assets/javascripts/lunr/wordcut.js create mode 100644 assets/javascripts/workers/search.208ed371.min.js create mode 100644 assets/javascripts/workers/search.208ed371.min.js.map create mode 100644 assets/stylesheets/extra.css create mode 100644 assets/stylesheets/main.7a7fce14.min.css create mode 100644 assets/stylesheets/main.7a7fce14.min.css.map create mode 100644 assets/stylesheets/palette.a0c5b2b5.min.css create mode 100644 assets/stylesheets/palette.a0c5b2b5.min.css.map create mode 100644 contributing/index.html create mode 100644 data-ingestion/index.html create mode 100644 deployment/aws/index.html create mode 100644 deployment/other-platforms/index.html create mode 100644 development/assets/pycharm-breakpoint.png create mode 100644 development/assets/pycharm-debug-ouput.png create mode 100644 development/assets/pycharm-remote-interpreter.png create mode 100644 development/assets/pycharm-start-debugger.png create mode 100644 development/assets/remote-debug-server.png create mode 100644 development/assets/vscode-debugger-output.png create mode 100644 development/assets/vscode-run-debug.png create mode 100644 development/database-migrations/index.html create mode 100644 development/enhancedtables/index.html create mode 100644 development/remotedebugging/index.html create mode 100644 development/running-locally/index.html create mode 100644 development/running-tests/index.html create mode 100644 development/updating-dependencies/index.html create mode 100644 favicons/moj.ico create mode 100644 index.html create mode 100644 logos/moj.png create mode 100644 search/search_index.json create mode 100644 sitemap.xml create mode 100644 sitemap.xml.gz create mode 100644 stylesheets/extra.css create mode 100644 stylesheets/tags-color.css diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 0000000000..e69de29bb2 diff --git a/404.html b/404.html new file mode 100644 index 0000000000..db22fcb022 --- /dev/null +++ b/404.html @@ -0,0 +1,933 @@ + + + +
+ + + + + + + + + + + + + +A common question is why not just NGINX instead of the custom proxy? The reason is the dynamic routing for the applications, e.g. URLs like https://jupyterlab-abcde1234.mydomain.com/some/path: each one has a lot of fairly complex requirements.
+While not impossible to leverage NGINX to move some code from the proxy, there would still need to be custom code, and NGINX would have to communicate via some mechanism to this custom code to achieve all of the above: extra HTTP or Redis requests, or maybe through a custom NGINX module. It is suspected that this will make things more complex rather than less, and increase the burden on the developer.
+We will use a custom proxy for Data Workspace, rather than simply using NGINX.
+This will decrease the burden on the developer that would have been required by custom NGINX modules, extra HTTP or Redis requests, which all would still have required custom code.
+Using the custom proxy allows for all of the complex requirements and dynamic routing of our applications over which we have absolute control.
+Initial difficulty when onboarding new team members as they will need to understand these decisions and requirements.
+There is an extra network hop compared to not having a proxy.
+The proxy fits the typical use-case of event-loop based programming: low CPU but high IO requirements, with potentially high number of connections.
+The asyncio library aiohttp provides enough low-level control over the headers and the bytes of requests and responses to work as a controllable proxy. For example, the typical HTTP request cycle can be programmed fairly explicitly.
+An incoming request begins: its headers are received.
+The library also allows for receiving and making WebSockets requests. This is done without knowledge ahead of time which path is WebSockets, and which is HTTP. This is something that doesn't seem possible with, for example, Django Channels.
+Requests and responses can be of the order of several GBs, so this streaming behaviour is a critical requirement.
+We will use the asyncio library aiohttp.
+Allows for critical requirement of streaming behaviour.
+We can stream HTTP(S) and Websockets requests in an efficient way with one cohesive Python package.
+A core bit of infrastructure will depend on a flavour of Python unknown to even experienced Python developers.
+Aiohttp is unable to proxy things that are not HTTP or Websockets, i.e. SSH. This is why GitLab isn't behind the proxy.
+This section contains a list of Architecture Decision Records (ADRs).
+As an example, from the point of view of user abcde1234
, https://jupyterlab-abcde1234.mydomain.com/
is the fixed address of their private JupyterLab application. Going to https://jupyterlab-abcde1234.mydomain.com/
in a browser will:
If the application is stopped, then a visit to https://jupyterlab-abcde1234.mydomain.com/
will repeat the process. The user will never leave https://jupyterlab-abcde1234.mydomain.com/
. If the user visits https://jupyterlab-abcde1234.mydomain.com/some/path
, they will also remain at https://jupyterlab-abcde1234.mydomain.com/some/path
to ensure, for example, bookmarks to any in-application page work even if they need to start the application to view them.
The browser will only make GET requests during the start of an application. While potentially a small abuse of HTTP, it allows the straightfoward behaviour described: no HTML form or JavaScript is required to start an application (although JavaScript is used to show a countdown to the user and to check if an application has loaded), and the GET requests are idempotent.
+The proxy however, has a more complex behaviour. On an incoming request from the browser for https://jupyterlab-abcde1234.mydomain.com/
:
GET
details of an application with the host jupyterlab-abcde1234
from an internal API of the main application;GET
returns a 404, it will make a PUT
request to the main application that initiates creation of the Fargate task;GET
returns a 200, and the details contain a URL, the proxy will attempt to proxy the incoming request to it;SPAWNING
application as a true error: they are effectively swallowed.GET
as STOPPED
, which happens on error, it will DELETE
the application, and show an error to the user.The proxy itself only responds to incoming requests from the browser, and has no long-lived tasks that go beyond one HTTP request or WebSockets connection. This ensures it can be horizontally scaled.
+ + + + + + +In addition to being able to run any Docker container, not just JupyterLab, Data Workspace has some deliberate architectural features that are different to JupyterHub.
+All state is in the database, accessed by the main Django application.
+Specifically, no state is kept in the memory of the main Django application. This means it can be horizontally scaled without issue.
+The proxy is also stateless: it fetches how to route requests from the main application, which itself fetches the data from the database. This means it can also be horizontally scaled without issue, and potentially independently from the main application. This means sticky sessions are not needed, and multiple users could access the same application, which is a planned feature for user-supplied visualisation applications.
+Authentication is completely handled by the proxy. Apart from specific exceptions like the healthcheck, non-authenticated requests do not reach the main application.
+The launched containers do not make requests to the main application, and the main application does not make requests to the launched containers. This means there are fewer cyclic dependencies in terms of data flow, and that applications don't need to be customised for this environment. They just need to open a port for HTTP requests, which makes them extremely standard web-based Docker applications.
+There is a notable exception to the statelessness of the main application: the launch of an application is made of a sequence of calls to AWS, and is done in a Celery task. If this sequence is interrupted, the launch of the application will fail. This is a solvable problem: the state could be saving into the database and the sequence resumed later. However, since this sequence of calls lasts only a few seconds, and the user will be told of the error and can refresh to try to launch the application again, at this stage of the project this has been deemed unnecessary.
+ + + + + + +Data Workspace is made of a number of components. This page explains what those are and how they work together.
+To understand the components of Data Workspace's architecture, you should have familiary with:
+At the highest level, users access the Data Workspace application, which accesses a PostgreSQL database.
+graph
+ A[User] --> B[Data Workspace]
+ B --> C["PostgreSQL (Aurora)"]
+The architecture is heavily Docker/ECS Fargate based.
+graph
+ A[User] -->|Staff SSO| B[Amazon Quicksight];
+ B --> C["PostgreSQL (Aurora)"];
+ A --> |Staff SSO|F["'The Proxy' (aiohttp)"];
+ F --> |rstudio-9c57e86a|G[Per-user and shared tools];
+ F --> H[Shiny, Flask, Django, NGINX];
+ F --> I[Django, Data Explorer];
+ G --> C;
+ H --> C;
+ I --> C;
+
+
+
+Main application: + A Django application to manage datasets and permissions, launch containers, a proxy to route requests to those containers, and an NGINX instance to route to the proxy and serve static files.
+JupyterLab: + Launched by users of the main application, and populated with credentials in the environment to access certain datasets.
+rStudio: + Launched by users of the main application, and populated with credentials in the environment to access certain datasets.
+pgAdmin: + Launched by users of the main application, and populated with credentials in the environment to access certain datasets.
+File browser: + A single-page-application that offers upload and download of files to/from each user's folder in S3. The data is transferred directly between the user's browser and S3.
+metrics: + A sidecar-container for the user-launched containers that exposes metrics from the ECS task metadata endpoint in Prometheus format.
+s3sync: + A sidecar-container for the user-launched containers that syncs to and from S3 using mobius3. This is to allow file-persistance on S3 without using FUSE, which at the time of writing is not possible on Fargate.
+dns-rewrite-proxy: + The DNS server of the VPC that launched containers run in. It selectively allows only certain DNS requests through to migitate chance of data exfiltration through DNS. When this container is deployed, it changes DHCP settings in the VPC, and will most likely break aspects of user-launched containers.
+healthcheck: + Proxies through to the healthcheck endpoint of the main application, so the main application can be in a security group locked-down to certain IP addresses, but still be monitored by Pingdom.
+mirrors-sync: + Mirrors pypi, CRAN and (ana)conda repositories to S3, so user-launched JupyterLab and rStudio containers can install packages without having to contact the public internet.
+prometheus: + Collects metrics from user-launched containers and re-exposes them through federation.
+registry: + A Docker pull-through-cache to repositories in quay.io. This allows the VPC to not have public internet access but still launch containers from quay.io in Fargate.
+sentryproxy: + Proxies errors to a Sentry instance: only used by JupyterLab.
+1}QK^&f3rLI-
z@Dofd#b8|pkVZ;{NMHgeWkceK&+C`)1aXEAD=Bwe=MULjESQKP6w|{HqNRiZ#!9*M
zVY@*bB%^uKsYkS4os^V#EKn|wW)iGk n-NM_WdMnWc%X27;CrpfYswzx^=BlLbErbXi^N~v@EWi-ahLpm3e
z0eeSqG>Zxy0zC*#SrRBRh(sI%uCB3h{+^6SRe1^)hq15}iz{>x(8AN%phgFV>4fqa
zq>k|niJV*mB)~WkS7I(hfaGA!=8f5mkcoh8VyznXShNy9Fs|8Uf|MbX+!m*UN&y{o
z+7nhb9t|)wAJi$5Xg;J9TZ}O>ED1`%evmohvbz-tp39-(@T_$Q6h2`j9k7880#R{1
zgkiiCVIpzPi7;4_l1cf1(m){(%AzuJo^b=UfR}d#Y{m>2l!h^KAy$?n(Ht*J#j-*p
zE)NoM#2_(-(?-^5<5@*;%*S^*Y@TqMAGdPhkJ9_Sq?h73V!W&dGA2T59b_{3QsJ~G
z?eO>_X25CfDT9xOqba%B!=)vAD6frcpsd)EvO5%zNM!JXJqM{kYNvQ`%u72H4w+G`
zl+c`)xvZ*C(urE5Y+9?+SRqM s?Z2!R=U#wer
z4`?g3Cx&08=V<%+zTGGLJ>05~tHY%>?+;r!XJrd;&guTBL(q=@9 $mhq}brN+QvAi0oHU3l1o=rCszULJ=?PDmKYu1RBNsJNnLW!>}8kjZ4K1+CK
zbywL3&3M@~S0fP$U>EcEz9+rB>7`Va=XwC9%@KWt 7;Ok)BjQqbmwd^xi{sAVd(y$A$rN>5HxeWaO%e$f8AYDy0JbHWXu5^s
z*1Mg@60w!1oR&xA-y@QO$-?Ey<9%IUE_?4kItgeIYf02<_x)j(Bfu8H>k|}*7pK3w
zLh65E|MePU+lQM$q~1y+B0#`ceWw!eK(VPW%&HI(q^RBzcE0ryswYc;=JZ~)WJC=0
zukL?==0!WI|F*yt6GU_99z|TRXlKp4A8n*IDxdv@A<0g-!_1Px;T1hkv)5Zv0!+?r
z9iJCK)rEap5Y2#_?SyKHu}W2$-lTTo340_FCeCfJ <%?nf^_fCi02Z%<%)POc$?p-o&Xftq+fSyL+rtpLf+4g34d#m$%|JM$Ouxa
z&F!lms^jToH?&G?Uw%|1?($E}xdrF3^5xry+cuwvgb3OH?!K4r8Q1f8v~s^W&yMH;
z?vRO031RT|-Q5^i!dNb-9f(zYya%sFX~Js1^nDgV;}eRkcCJ0X3HYL>mC**9pa1wN
znxiH9DCX`^HH-}=_%X@`V^PT@fDRUJF>mpIXP5Lw3G-PvL0j)RM$R?fHf1S3LNBEA
zypsIvKU~XTHkixB$%PqJvN&p Bw}|
zi!mU7kyT^AUe}RZ>!y}nmCNF2nW5FqBI Dfvz%R&AONc*8fLwzt;3EH5uYB8_<`4m>igb9IPp
z0L5M~{uQuj+7TibY9tYouqy=^`}Ev&JkMuolVlP
@%I$y;Y#WFH;mqM
zWZSMs*eN8bs7OXod@K+;c4wu-G3a&Z)%ey))%}ENk8KxZz3XgWe>>_z-M@Fw{c^o&
zq|y%^