diff --git a/static/quickstart_docker/Dockerfile_delta_quickstart b/static/quickstart_docker/Dockerfile_delta_quickstart index a7c7dea..b200483 100644 --- a/static/quickstart_docker/Dockerfile_delta_quickstart +++ b/static/quickstart_docker/Dockerfile_delta_quickstart @@ -18,20 +18,23 @@ # Dockerfile for Delta Lake quickstart # ------------------------------------------------ -# This docker image uses the official Docker image of [OSS] Apache Spark v3.3.2 as the base container +# This docker image uses the official Docker image of [OSS] Apache Spark v3.5.0 as the base container # Note: Python version in this image is 3.9.2 and is available as `python3`. -ARG BASE_CONTAINER=apache/spark-py:v3.3.2 +# Note: PySpark v3.5.0 (https://spark.apache.org/docs/latest/api/python/getting_started/install.html#dependencies) +ARG BASE_CONTAINER=spark:3.5.0-scala2.12-java11-python3-ubuntu FROM $BASE_CONTAINER as spark FROM spark as delta # Authors (add your name when updating the Dockerfile) -LABEL authors="Prashanth Babu,Denny Lee,Andrew Bauman" +LABEL authors="Prashanth Babu,Denny Lee,Andrew Bauman, Scott Haines" # Docker image was created and tested with the versions of following packages. USER root -ARG DELTA_SPARK_VERSION="2.3.0" -ARG DELTALAKE_VERSION="0.9.0" -ARG JUPYTERLAB_VERSION="3.6.3" +ARG DELTA_SPARK_VERSION="3.0.0" +# Note: for 3.0.0 https://pypi.org/project/deltalake/ +ARG DELTALAKE_VERSION="0.12.0" +ARG JUPYTERLAB_VERSION="4.0.7" +# requires pandas >1.0.5, py4j>=0.10.9.7, pyarrow>=4.0.0 ARG PANDAS_VERSION="1.5.3" ARG ROAPI_VERSION="0.9.0" @@ -45,7 +48,7 @@ FROM delta as startup ARG NBuser=NBuser ARG GROUP=NBuser ARG WORKDIR=/opt/spark/work-dir -ENV DELTA_PACKAGE_VERSION=delta-core_2.12:${DELTA_SPARK_VERSION} +ENV DELTA_PACKAGE_VERSION=delta-spark_2.12:${DELTA_SPARK_VERSION} # OS Installations Configurations RUN groupadd -r ${GROUP} && useradd -r -m -g ${GROUP} ${NBuser} @@ -62,7 +65,8 @@ RUN chown -R ${NBuser}:${GROUP} /home/${NBuser}/ \ # Rust install USER ${NBuser} RUN curl https://sh.rustup.rs -sSf | sh -s -- -y -RUN source "$HOME/.cargo/env" +# moved the source command into the bash process in the entrypoint startup.sh +#RUN source "$HOME/.cargo/env" # Establish entrypoint ENTRYPOINT ["bash", "startup.sh"] \ No newline at end of file diff --git a/static/quickstart_docker/README.md b/static/quickstart_docker/README.md index 550ac00..b89f0a2 100644 --- a/static/quickstart_docker/README.md +++ b/static/quickstart_docker/README.md @@ -45,10 +45,12 @@ Note, there are different versions of the Delta Lake docker | ----------------- | -------- | ------ | ------ | ----------- | ----- | ---------- | ------ | ----- | | 0.8.1_2.3.0 | amd64 | 0.8.1 | latest | 2.3.0 | 3.3.2 | 3.6.3 | 1.5.3 | 0.9.0 | | 0.8.1_2.3.0_arm64 | arm64 | 0.8.1 | latest | 2.3.0 | 3.3.2 | 3.6.3 | 1.5.3 | 0.9.0 | -| latest | amd64 | 0.9.0 | latest | 2.3.0 | 3.3.2 | 3.6.3 | 1.5.3 | 0.9.0 | -| latest | arm64 | 0.9.0 | latest | 2.3.0 | 3.3.2 | 3.6.3 | 1.5.3 | 0.9.0 | +| 1.0.0_3.0.0 | amd64 | 0.12.0 | latest | 3.0.0 | 3.5.0 | 3.6.3 | 1.5.3 | 0.9.0 | +| 1.0.0_3.0.0_arm64 | arm64 | 0.12.0 | latest | 3.0.0 | 3.5.0 | 3.6.3 | 1.5.3 | 0.9.0 | +| latest | amd64 | 0.12.0 | latest | 3.0.0 | 3.5.0 | 3.6.3 | 1.5.3 | 0.9.0 | +| latest | arm64 | 0.12.0 | latest | 3.0.0 | 3.5.0 | 3.6.3 | 1.5.3 | 0.9.0 | -\*\* Note, the arm64 version is built for ARM64 platforms like Mac M1 +> Note, the arm64 version is built for ARM64 platforms like Mac M1 Download the appropriate tag, e.g.: @@ -75,7 +77,7 @@ Once the image has been built or you have downloaded the correct image, you can In the following instructions, the variable `${DELTA_PACKAGE_VERSION}` refers to the Delta Lake Package version. -The current version is `delta-core_2.12:2.3.0` which corresponds to Apache Spark 3.3.x release line. +The current version is `delta-spark_2.12:3.0.0` which corresponds to Apache Spark 3.5.x release line. ## Choose an Interface @@ -98,7 +100,7 @@ The current version is `delta-core_2.12:2.3.0` which corresponds to Apache Spark python3 ``` - > Note: The Delta Rust Python bindings are already installed in this docker. To do this manually in your own environment, run the command: `pip3 install deltalake==0.9.0` + > Note: The Delta Rust Python bindings are already installed in this docker. To do this manually in your own environment, run the command: `pip3 install deltalake==0.12.0` 1. Run some basic commands in the shell to write to and read from Delta Lake with Pandas @@ -126,13 +128,13 @@ The current version is `delta-core_2.12:2.3.0` which corresponds to Apache Spark ```python ## Output - 0 - 0 0 - 1 1 - 2 2 - ... ... - 8 9 - 9 10 + data + 0 0 + 1 1 + 2 2 + ... + 8 9 + 9 10 ``` 1. Review the files @@ -144,7 +146,7 @@ The current version is `delta-core_2.12:2.3.0` which corresponds to Apache Spark ```python ## Output - ['0-d4920663-30e9-4a1a-afde-59bc4ebd24b5-0.parquet', '1-f27a5ea6-a15f-4ca1-91b3-72bcf64fbc09-0.parquet'] + ['0-6944fddf-60e3-4eab-811d-1398e9f64073-0.parquet', '1-66c7ee6e-6aab-4c74-866d-a82790102652-0.parquet'] ``` 1. Review history @@ -156,7 +158,7 @@ The current version is `delta-core_2.12:2.3.0` which corresponds to Apache Spark ```python ## Output - [{'timestamp': 1682475171964, 'delta-rs': '0.8.0'}, {'timestamp': 1682475171985, 'operation': 'WRITE', 'operationParameters': {'partitionBy': '[]', 'mode': 'Append'}, 'clientVersion': 'delta-rs.0.8.0'}] + [{'timestamp': 1698002214493, 'operation': 'WRITE', 'operationParameters': {'mode': 'Append', 'partitionBy': '[]'}, 'clientVersion': 'delta-rs.0.17.0', 'version': 1}, {'timestamp': 1698002207527, 'operation': 'CREATE TABLE', 'operationParameters': {'mode': 'ErrorIfExists', 'protocol': '{"minReaderVersion":1,"minWriterVersion":1}', 'location': 'file:///tmp/deltars_table', 'metadata': '{"configuration":{},"created_time":1698002207525,"description":null,"format":{"options":{},"provider":"parquet"},"id":"bf749aab-22b6-484b-bd73-dc1680ee4384","name":null,"partition_columns":[],"schema":{"fields":[{"metadata":{},"name":"data","nullable":true,"type":"long"}],"type":"struct"}}'}, 'clientVersion': 'delta-rs.0.17.0', 'version': 0}] ``` 1. Time Travel (load older version of table) @@ -171,12 +173,12 @@ The current version is `delta-core_2.12:2.3.0` which corresponds to Apache Spark ```python ## Output - 0 - 0 0 - 1 1 - 2 2 - 3 3 - 4 4 + data + 0 0 + 1 1 + 2 2 + 3 3 + 4 4 ``` 1. Follow the delta-rs Python documentation [here](https://delta-io.github.io/delta-rs/python/usage.html#) @@ -189,9 +191,9 @@ The current version is `delta-core_2.12:2.3.0` which corresponds to Apache Spark ```bash total 12 - 4 drwxr-xr-x 2 NBuser 4096 Apr 26 02:12 _delta_log - 4 -rw-r--r-- 1 NBuser 1689 Apr 26 02:12 0-d4920663-30e9-4a1a-afde-59bc4ebd24b5-0.parquet - 4 -rw-r--r-- 1 NBuser 1691 Apr 26 02:12 1-f27a5ea6-a15f-4ca1-91b3-72bcf64fbc09-0.parquet + 4 -rw-r--r-- 1 NBuser 1689 Oct 22 19:16 0-6944fddf-60e3-4eab-811d-1398e9f64073-0.parquet + 4 -rw-r--r-- 1 NBuser 1691 Oct 22 19:16 1-66c7ee6e-6aab-4c74-866d-a82790102652-0.parquet + 4 drwxr-xr-x 2 NBuser 4096 Oct 22 19:16 _delta_log ``` 1. [Optional] Skip ahead to try out the [Delta Rust API](#delta-rust-api) and [ROAPI](#optional-roapi) @@ -225,11 +227,15 @@ The current version is `delta-core_2.12:2.3.0` which corresponds to Apache Spark 3. Launch a pyspark interactive shell session ```bash + $SPARK_HOME/bin/pyspark --packages io.delta:${DELTA_PACKAGE_VERSION} \ + --conf spark.driver.extraJavaOptions="-Divy.cache.dir=/tmp -Divy.home=/tmp" \ --conf "spark.sql.extensions=io.delta.sql.DeltaSparkSessionExtension" \ --conf "spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog" ``` + > Note: `DELTA_PACKAGE_VERSION` is set in `./startup.sh` + 4. Run some basic commands in the shell ```python @@ -277,16 +283,20 @@ The current version is `delta-core_2.12:2.3.0` which corresponds to Apache Spark ``` ```bash - total 36 - 4 drwxr-xr-x 2 NBuser 4096 Apr 26 02:30 _delta_log - 4 -rw-r--r-- 1 NBuser 12 Apr 26 02:30 .part-00000-bdee316b-8623-4423-b59c-6a809addaea8-c000.snappy.parquet.crc - 4 -rw-r--r-- 1 NBuser 12 Apr 26 02:30 .part-00001-6b373d50-5bdd-496a-9e21-ab4164176f11-c000.snappy.parquet.crc - 4 -rw-r--r-- 1 NBuser 12 Apr 26 02:30 .part-00002-9721ce9e-e043-4875-bcff-08f7d7c3d3f0-c000.snappy.parquet.crc - 4 -rw-r--r-- 1 NBuser 12 Apr 26 02:30 .part-00003-61aaf450-c318-452a-aea5-5a44c909fd74-c000.snappy.parquet.crc - 4 -rw-r--r-- 1 NBuser 478 Apr 26 02:30 part-00000-bdee316b-8623-4423-b59c-6a809addaea8-c000.snappy.parquet - 4 -rw-r--r-- 1 NBuser 478 Apr 26 02:30 part-00001-6b373d50-5bdd-496a-9e21-ab4164176f11-c000.snappy.parquet - 4 -rw-r--r-- 1 NBuser 478 Apr 26 02:30 part-00002-9721ce9e-e043-4875-bcff-08f7d7c3d3f0-c000.snappy.parquet - 4 -rw-r--r-- 1 NBuser 486 Apr 26 02:30 part-00003-61aaf450-c318-452a-aea5-5a44c909fd74-c000.snappy.parquet + total 52 + 4 drwxr-xr-x 2 NBuser 4096 Oct 22 19:23 _delta_log + 4 -rw-r--r-- 1 NBuser 296 Oct 22 19:23 part-00000-dc0fd6b3-9c0f-442f-a6db-708301b27bd2-c000.snappy.parquet + 4 -rw-r--r-- 1 NBuser 12 Oct 22 19:23 .part-00000-dc0fd6b3-9c0f-442f-a6db-708301b27bd2-c000.snappy.parquet.crc + 4 -rw-r--r-- 1 NBuser 478 Oct 22 19:23 part-00001-d379441e-1ee4-4e78-8616-1d9635df1c7b-c000.snappy.parquet + 4 -rw-r--r-- 1 NBuser 12 Oct 22 19:23 .part-00001-d379441e-1ee4-4e78-8616-1d9635df1c7b-c000.snappy.parquet.crc + 4 -rw-r--r-- 1 NBuser 478 Oct 22 19:23 part-00003-c08dcac4-5ea9-4329-b85d-9110493e8757-c000.snappy.parquet + 4 -rw-r--r-- 1 NBuser 12 Oct 22 19:23 .part-00003-c08dcac4-5ea9-4329-b85d-9110493e8757-c000.snappy.parquet.crc + 4 -rw-r--r-- 1 NBuser 478 Oct 22 19:23 part-00005-5db8dd16-2ab1-4d76-9b4d-457c5641b1c8-c000.snappy.parquet + 4 -rw-r--r-- 1 NBuser 12 Oct 22 19:23 .part-00005-5db8dd16-2ab1-4d76-9b4d-457c5641b1c8-c000.snappy.parquet.crc + 4 -rw-r--r-- 1 NBuser 478 Oct 22 19:23 part-00007-cad760e0-3c26-4d22-bed6-7d75a9459a0f-c000.snappy.parquet + 4 -rw-r--r-- 1 NBuser 12 Oct 22 19:23 .part-00007-cad760e0-3c26-4d22-bed6-7d75a9459a0f-c000.snappy.parquet.crc + 4 -rw-r--r-- 1 NBuser 478 Oct 22 19:23 part-00009-b58e8445-07b7-4e2a-9abf-6fea8d0c3e3f-c000.snappy.parquet + 4 -rw-r--r-- 1 NBuser 12 Oct 22 19:23 .part-00009-b58e8445-07b7-4e2a-9abf-6fea8d0c3e3f-c000.snappy.parquet.crc ``` ### Scala Shell @@ -299,17 +309,21 @@ The current version is `delta-core_2.12:2.3.0` which corresponds to Apache Spark ```bash $SPARK_HOME/bin/spark-shell --packages io.delta:${DELTA_PACKAGE_VERSION} \ + --conf spark.driver.extraJavaOptions="-Divy.cache.dir=/tmp -Divy.home=/tmp" \ --conf "spark.sql.extensions=io.delta.sql.DeltaSparkSessionExtension" \ --conf "spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog" ``` 4. Run some basic commands in the shell + > note: if you've already written to the Delta table in the python shell example, use `.mode("overwrite")` to overwrite the current delta table. You can always time-travel to rewind. + ```scala // Create a Spark DataFrame val data = spark.range(0, 5) // Write to a Delta Lake table + (data .write .format("delta") @@ -350,22 +364,29 @@ The current version is `delta-core_2.12:2.3.0` which corresponds to Apache Spark ``` ```bash - total 36 - 4 drwxr-xr-x 2 NBuser 4096 Apr 26 02:31 _delta_log - 4 -rw-r--r-- 1 NBuser 12 Apr 26 02:31 .part-00000-e0353d3e-7473-4ff7-9b58-e977d48d008a-c000.snappy.parquet.crc - 4 -rw-r--r-- 1 NBuser 12 Apr 26 02:31 .part-00001-0e2c89cf-3f9b-4698-b059-6dd41d4e3aed-c000.snappy.parquet.crc - 4 -rw-r--r-- 1 NBuser 12 Apr 26 02:31 .part-00002-06bf68f9-16d8-4c08-ba8e-7b0b00d52b8e-c000.snappy.parquet.crc - 4 -rw-r--r-- 1 NBuser 12 Apr 26 02:31 .part-00003-5963f002-d98a-421f-9c2d-22376b7f87e4-c000.snappy.parquet.crc - 4 -rw-r--r-- 1 NBuser 478 Apr 26 02:31 part-00000-e0353d3e-7473-4ff7-9b58-e977d48d008a-c000.snappy.parquet - 4 -rw-r--r-- 1 NBuser 478 Apr 26 02:31 part-00001-0e2c89cf-3f9b-4698-b059-6dd41d4e3aed-c000.snappy.parquet - 4 -rw-r--r-- 1 NBuser 478 Apr 26 02:31 part-00002-06bf68f9-16d8-4c08-ba8e-7b0b00d52b8e-c000.snappy.parquet - 4 -rw-r--r-- 1 NBuser 486 Apr 26 02:31 part-00003-5963f002-d98a-421f-9c2d-22376b7f87e4-c000.snappy.parquet + total 52 + 4 drwxr-xr-x 2 NBuser 4096 Oct 22 19:28 _delta_log + 4 -rw-r--r-- 1 NBuser 296 Oct 22 19:28 part-00000-f1f417f7-df64-4c7c-96f2-6a452ae2b49e-c000.snappy.parquet + 4 -rw-r--r-- 1 NBuser 12 Oct 22 19:28 .part-00000-f1f417f7-df64-4c7c-96f2-6a452ae2b49e-c000.snappy.parquet.crc + 4 -rw-r--r-- 1 NBuser 478 Oct 22 19:28 part-00001-b28acb6f-f08a-460f-a24e-4d9c1affee86-c000.snappy.parquet + 4 -rw-r--r-- 1 NBuser 12 Oct 22 19:28 .part-00001-b28acb6f-f08a-460f-a24e-4d9c1affee86-c000.snappy.parquet.crc + 4 -rw-r--r-- 1 NBuser 478 Oct 22 19:28 part-00003-29079c58-d1ad-4604-9c04-0f00bf09546d-c000.snappy.parquet + 4 -rw-r--r-- 1 NBuser 12 Oct 22 19:28 .part-00003-29079c58-d1ad-4604-9c04-0f00bf09546d-c000.snappy.parquet.crc + 4 -rw-r--r-- 1 NBuser 478 Oct 22 19:28 part-00005-04424aa7-48e1-4212-bd57-52552c713154-c000.snappy.parquet + 4 -rw-r--r-- 1 NBuser 12 Oct 22 19:28 .part-00005-04424aa7-48e1-4212-bd57-52552c713154-c000.snappy.parquet.crc + 4 -rw-r--r-- 1 NBuser 478 Oct 22 19:28 part-00007-e7a54a4f-bee4-4371-a35d-d284e28eb9f8-c000.snappy.parquet + 4 -rw-r--r-- 1 NBuser 12 Oct 22 19:28 .part-00007-e7a54a4f-bee4-4371-a35d-d284e28eb9f8-c000.snappy.parquet.crc + 4 -rw-r--r-- 1 NBuser 478 Oct 22 19:28 part-00009-086e6cd9-e8c6-4f16-9658-b15baf22905d-c000.snappy.parquet + 4 -rw-r--r-- 1 NBuser 12 Oct 22 19:28 .part-00009-086e6cd9-e8c6-4f16-9658-b15baf22905d-c000.snappy.parquet.crc ``` ### Delta Rust API +> Note: Use a docker volume in case of running into limits "no room left on device" +> `docker volume create rustbuild` > `docker run --name delta_quickstart -v rustbuild:/tmp --rm -it --entrypoint bash deltaio/delta-docker:3.0.0` + 1. Open a bash shell (if on windows use git bash, WSL, or any shell configured for bash commands) 2. Run a container from the image with a bash entrypoint ([build](#build-entry-point) | [DockerHub](#image-entry-point)) @@ -377,11 +398,18 @@ The current version is `delta-core_2.12:2.3.0` which corresponds to Apache Spark cargo run --example read_delta_table ``` + > You can also use a different location to build and run the examples + + ```bash + cd rs + CARGO_TARGET_DIR=/tmp cargo run --example read_delta_table + ``` + > If using [Delta Lake DockerHub](https://go.delta.io/dockerhub), sometimes the Rust environment hasn't been configured. To resolve this, run the command `source "$HOME/.cargo/env"` ```bash === Delta table metadata === - DeltaTable(../quickstart_docker/rs/data/COVID-19_NYT) + DeltaTable(/opt/spark/work-dir/rs/data/COVID-19_NYT) version: 0 metadata: GUID=7245fd1d-8a6d-4988-af72-92a95b646511, name=None, description=None, partitionColumns=[], createdTime=Some(1619121484605), configuration={} min_version: read=1, write=2 @@ -389,16 +417,7 @@ The current version is `delta-core_2.12:2.3.0` which corresponds to Apache Spark === Delta table files === - [ - Path { raw: "part-00000-a496f40c-e091-413a-85f9-b1b69d4b3b4e-c000.snappy.parquet" }, - Path { raw: "part-00001-9d9d980b-c500-4f0b-bb96-771a515fbccc-c000.snappy.parquet" }, - Path { raw: "part-00002-8826af84-73bd-49a6-a4b9-e39ffed9c15a-c000.snappy.parquet" }, - Path { raw: "part-00003-539aff30-2349-4b0d-9726-c18630c6ad90-c000.snappy.parquet" }, - Path { raw: "part-00004-1bb9c3e3-c5b0-4d60-8420-23261f58a5eb-c000.snappy.parquet" }, - Path { raw: "part-00005-4d47f8ff-94db-4d32-806c-781a1cf123d2-c000.snappy.parquet" }, - Path { raw: "part-00006-d0ec7722-b30c-4e1c-92cd-b4fe8d3bb954-c000.snappy.parquet" }, - Path { raw: "part-00007-4582392f-9fc2-41b0-ba97-a74b3afc8239-c000.snappy.parquet" } - ] + [Path { raw: "part-00000-a496f40c-e091-413a-85f9-b1b69d4b3b4e-c000.snappy.parquet" }, Path { raw: "part-00001-9d9d980b-c500-4f0b-bb96-771a515fbccc-c000.snappy.parquet" }, Path { raw: "part-00002-8826af84-73bd-49a6-a4b9-e39ffed9c15a-c000.snappy.parquet" }, Path { raw: "part-00003-539aff30-2349-4b0d-9726-c18630c6ad90-c000.snappy.parquet" }, Path { raw: "part-00004-1bb9c3e3-c5b0-4d60-8420-23261f58a5eb-c000.snappy.parquet" }, Path { raw: "part-00005-4d47f8ff-94db-4d32-806c-781a1cf123d2-c000.snappy.parquet" }, Path { raw: "part-00006-d0ec7722-b30c-4e1c-92cd-b4fe8d3bb954-c000.snappy.parquet" }, Path { raw: "part-00007-4582392f-9fc2-41b0-ba97-a74b3afc8239-c000.snappy.parquet" }] ``` 4. Execute `examples/read_delta_datafusion.rs` to query the `covid19_nyt` Delta Lake table using `datafusion` @@ -408,37 +427,29 @@ The current version is `delta-core_2.12:2.3.0` which corresponds to Apache Spark ``` ```bash + === Datafusion query === + [RecordBatch { schema: Schema { fields: [Field { name: "cases", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: None }, Field { name: "county", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: None }, Field { name: "date", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: None }], metadata: {} }, columns: [PrimitiveArray [ - RecordBatch { - schema: Schema { - fields: [ - Field { name: "cases", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: None }, - Field { name: "county", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: None }, - Field { name: "date", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: None } - ], metadata: {} - }, - columns: [PrimitiveArray[ - 1, - 1, - 1, - 1, - 1, - ], StringArray [ - "Snohomish", - "Snohomish", - "Snohomish", - "Cook", - "Snohomish", - ], StringArray [ - "2020-01-21", - "2020-01-22", - "2020-01-23", - "2020-01-24", - "2020-01-24", - ]], - row_count: 5 - } - ] + 1, + 1, + 1, + 1, + 1, + ], StringArray + [ + "Snohomish", + "Snohomish", + "Snohomish", + "Cook", + "Snohomish", + ], StringArray + [ + "2020-01-21", + "2020-01-22", + "2020-01-23", + "2020-01-24", + "2020-01-24", + ]], row_count: 5 }] ```

diff --git a/static/quickstart_docker/rs/Cargo.toml b/static/quickstart_docker/rs/Cargo.toml index aa2ebf7..e7b8034 100644 --- a/static/quickstart_docker/rs/Cargo.toml +++ b/static/quickstart_docker/rs/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "quickstart" -version = "0.1.1" -rust-version = "1.64" +version = "0.1.2" +rust-version = "1.73" authors = ["Denny Lee "] license = "Apache-2.0" keywords = ["deltalake", "delta", "datalake", "deltars"] diff --git a/static/quickstart_docker/startup.sh b/static/quickstart_docker/startup.sh index 9d888b3..04025b5 100644 --- a/static/quickstart_docker/startup.sh +++ b/static/quickstart_docker/startup.sh @@ -1,8 +1,13 @@ #!/bin/bash +source "$HOME/.cargo/env" + export PYSPARK_DRIVER_PYTHON=jupyter export PYSPARK_DRIVER_PYTHON_OPTS='lab --ip=0.0.0.0' +export DELTA_SPARK_VERSION='3.0.0' +export DELTA_PACKAGE_VERSION=delta-spark_2.12:${DELTA_SPARK_VERSION} $SPARK_HOME/bin/pyspark --packages io.delta:${DELTA_PACKAGE_VERSION} \ ---conf "spark.sql.extensions=io.delta.sql.DeltaSparkSessionExtension" \ ---conf "spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog" \ No newline at end of file + --conf "spark.driver.extraJavaOptions=-Divy.cache.dir=/tmp -Divy.home=/tmp -Dio.netty.tryReflectionSetAccessible=true" \ + --conf "spark.sql.extensions=io.delta.sql.DeltaSparkSessionExtension" \ + --conf "spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog"