From 43a448e25889fc47095750b3fc7817ae56f78d29 Mon Sep 17 00:00:00 2001 From: Nilesh Gajwani Date: Mon, 16 Oct 2023 17:32:00 -0400 Subject: [PATCH] Refactor notebooks to include SedonaKepler and Sedona 1.5.0 Refactor examples folder to include correct updated Sedona version --- binder/ApacheSedonaSQL.ipynb | 432 ++---------------- ...naSQL_SpatialJoin_AirportsPerCountry.ipynb | 60 +-- binder/Pipfile | 3 +- examples/flink-sql/pom.xml | 2 +- .../spark-rdd-colocation-mining/build.sbt | 2 +- examples/spark-sql/build.sbt | 2 +- examples/spark-viz/build.sbt | 2 +- 7 files changed, 79 insertions(+), 424 deletions(-) diff --git a/binder/ApacheSedonaSQL.ipynb b/binder/ApacheSedonaSQL.ipynb index a58c264eed..cfba2d8035 100644 --- a/binder/ApacheSedonaSQL.ipynb +++ b/binder/ApacheSedonaSQL.ipynb @@ -24,8 +24,10 @@ }, { "cell_type": "code", - "execution_count": 1, - "metadata": {}, + "execution_count": null, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "import os\n", @@ -38,54 +40,15 @@ }, { "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - ":: loading settings :: url = jar:file:/Users/nileshgajwani/Desktop/spark/spark-3.4.0-bin-hadoop3/jars/ivy-2.5.1.jar!/org/apache/ivy/core/settings/ivysettings.xml\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Ivy Default Cache set to: /Users/nileshgajwani/.ivy2/cache\n", - "The jars for the packages stored in: /Users/nileshgajwani/.ivy2/jars\n", - "org.apache.sedona#sedona-spark-shaded-3.0_2.12 added as a dependency\n", - "org.datasyslab#geotools-wrapper added as a dependency\n", - ":: resolving dependencies :: org.apache.spark#spark-submit-parent-ade932f0-a9e8-47af-b559-0d52a6a087e9;1.0\n", - "\tconfs: [default]\n", - "\tfound org.apache.sedona#sedona-spark-shaded-3.0_2.12;1.4.1 in central\n", - "\tfound org.datasyslab#geotools-wrapper;1.4.0-28.2 in central\n", - ":: resolution report :: resolve 81ms :: artifacts dl 2ms\n", - "\t:: modules in use:\n", - "\torg.apache.sedona#sedona-spark-shaded-3.0_2.12;1.4.1 from central in [default]\n", - "\torg.datasyslab#geotools-wrapper;1.4.0-28.2 from central in [default]\n", - "\t---------------------------------------------------------------------\n", - "\t| | modules || artifacts |\n", - "\t| conf | number| search|dwnlded|evicted|| number|dwnlded|\n", - "\t---------------------------------------------------------------------\n", - "\t| default | 2 | 0 | 0 | 0 || 2 | 0 |\n", - "\t---------------------------------------------------------------------\n", - ":: retrieving :: org.apache.spark#spark-submit-parent-ade932f0-a9e8-47af-b559-0d52a6a087e9\n", - "\tconfs: [default]\n", - "\t0 artifacts copied, 2 already retrieved (0kB/2ms)\n", - "23/07/03 21:13:44 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n", - "Setting default log level to \"WARN\".\n", - "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n", - "23/07/03 21:13:44 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.\n", - "23/07/03 21:13:44 WARN Utils: Service 'SparkUI' could not bind on port 4041. Attempting port 4042.\n", - " \r" - ] - } - ], + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], "source": [ "config = SedonaContext.builder() .\\\n", " config('spark.jars.packages',\n", - " 'org.apache.sedona:sedona-spark-shaded-3.0_2.12:1.4.1,'\n", + " 'org.apache.sedona:sedona-spark-shaded-3.0_2.12:1.5.0,'\n", " 'org.datasyslab:geotools-wrapper:1.4.0-28.2'). \\\n", " getOrCreate()\n", "\n", @@ -108,27 +71,9 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+-----------------+\n", - "| arealandmark|\n", - "+-----------------+\n", - "|POINT (1.1 101.1)|\n", - "|POINT (2.1 102.1)|\n", - "|POINT (3.1 103.1)|\n", - "|POINT (4.1 104.1)|\n", - "|POINT (5.1 105.1)|\n", - "+-----------------+\n", - "only showing top 5 rows\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "point_csv_df = sedona.read.format(\"csv\").\\\n", " option(\"delimiter\", \",\").\\\n", @@ -150,27 +95,9 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+----------------+--------------------+\n", - "| name| countyshape|\n", - "+----------------+--------------------+\n", - "| Cuming County|POLYGON ((-97.019...|\n", - "|Wahkiakum County|POLYGON ((-123.43...|\n", - "| De Baca County|POLYGON ((-104.56...|\n", - "|Lancaster County|POLYGON ((-96.910...|\n", - "| Nuckolls County|POLYGON ((-98.273...|\n", - "+----------------+--------------------+\n", - "only showing top 5 rows\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "polygon_wkt_df = sedona.read.format(\"csv\").\\\n", " option(\"delimiter\", \"\\t\").\\\n", @@ -191,27 +118,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+----------------+--------------------+\n", - "| name| countyshape|\n", - "+----------------+--------------------+\n", - "| Cuming County|POLYGON ((-97.019...|\n", - "|Wahkiakum County|POLYGON ((-123.43...|\n", - "| De Baca County|POLYGON ((-104.56...|\n", - "|Lancaster County|POLYGON ((-96.910...|\n", - "| Nuckolls County|POLYGON ((-98.273...|\n", - "+----------------+--------------------+\n", - "only showing top 5 rows\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "polygon_wkb_df = sedona.read.format(\"csv\").\\\n", " option(\"delimiter\", \"\\t\").\\\n", @@ -232,27 +141,9 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+--------------------+\n", - "| countyshape|\n", - "+--------------------+\n", - "|POLYGON ((-87.621...|\n", - "|POLYGON ((-85.719...|\n", - "|POLYGON ((-86.000...|\n", - "|POLYGON ((-86.574...|\n", - "|POLYGON ((-85.382...|\n", - "+--------------------+\n", - "only showing top 5 rows\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "polygon_json_df = sedona.read.format(\"csv\").\\\n", " option(\"delimiter\", \"\\t\").\\\n", @@ -280,36 +171,9 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "== Physical Plan ==\n", - "BroadcastIndexJoin pointshape2#253: geometry, LeftSide, LeftSide, Inner, INTERSECTS, ( **org.apache.spark.sql.sedona_sql.expressions.ST_Distance** < 2.0) ST_INTERSECTS(pointshape1#228, pointshape2#253)\n", - ":- SpatialIndex pointshape1#228: geometry, QUADTREE, false, 2.0\n", - ": +- Project [ **org.apache.spark.sql.sedona_sql.expressions.ST_Point** AS pointshape1#228, abc AS name1#229]\n", - ": +- FileScan csv [_c0#224,_c1#225] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/Users/nileshgajwani/Desktop/sedona/sedona/binder/data/testpoint...., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<_c0:string,_c1:string>\n", - "+- Project [ **org.apache.spark.sql.sedona_sql.expressions.ST_Point** AS pointshape2#253, def AS name2#254]\n", - " +- FileScan csv [_c0#249,_c1#250] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/Users/nileshgajwani/Desktop/sedona/sedona/binder/data/testpoint...., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<_c0:string,_c1:string>\n", - "\n", - "\n", - "+-----------------+-----+-----------------+-----+\n", - "| pointshape1|name1| pointshape2|name2|\n", - "+-----------------+-----+-----------------+-----+\n", - "|POINT (1.1 101.1)| abc|POINT (1.1 101.1)| def|\n", - "|POINT (2.1 102.1)| abc|POINT (1.1 101.1)| def|\n", - "|POINT (1.1 101.1)| abc|POINT (2.1 102.1)| def|\n", - "|POINT (2.1 102.1)| abc|POINT (2.1 102.1)| def|\n", - "|POINT (3.1 103.1)| abc|POINT (2.1 102.1)| def|\n", - "+-----------------+-----+-----------------+-----+\n", - "only showing top 5 rows\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "point_csv_df_1 = sedona.read.format(\"csv\").\\\n", " option(\"delimiter\", \",\").\\\n", @@ -351,7 +215,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -365,57 +229,25 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "root\n", - " |-- osm_id: string (nullable = true)\n", - " |-- code: long (nullable = true)\n", - " |-- fclass: string (nullable = true)\n", - " |-- name: string (nullable = true)\n", - " |-- geometry: geometry (nullable = true)\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "osm_points.printSchema()" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+--------+----+---------+--------------+--------------------+\n", - "| osm_id|code| fclass| name| geometry|\n", - "+--------+----+---------+--------------+--------------------+\n", - "|26860257|2422|camp_site| de Kroon|POINT (15.3393145...|\n", - "|26860294|2406| chalet|Leśne Ustronie|POINT (14.8709625...|\n", - "|29947493|2402| motel| |POINT (15.0946636...|\n", - "|29947498|2602| atm| |POINT (15.0732014...|\n", - "|29947499|2401| hotel| |POINT (15.0696777...|\n", - "+--------+----+---------+--------------+--------------------+\n", - "only showing top 5 rows\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "osm_points.show(5)" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -424,7 +256,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -441,34 +273,16 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+--------+----+---------+--------------+--------------------+\n", - "| osm_id|code| fclass| name| geom|\n", - "+--------+----+---------+--------------+--------------------+\n", - "|26860257|2422|camp_site| de Kroon|POINT (-3288183.3...|\n", - "|26860294|2406| chalet|Leśne Ustronie|POINT (-3341183.9...|\n", - "|29947493|2402| motel| |POINT (-3320466.5...|\n", - "|29947498|2602| atm| |POINT (-3323205.7...|\n", - "|29947499|2401| hotel| |POINT (-3323655.1...|\n", - "+--------+----+---------+--------------+--------------------+\n", - "only showing top 5 rows\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "transformed_df.show(5)" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -477,7 +291,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -492,49 +306,9 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "23/07/03 21:13:53 WARN JoinQuery: UseIndex is true, but no index exists. Will build index on the fly.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+----------+---------+--------------------+\n", - "| id_1| id_2| geom|\n", - "+----------+---------+--------------------+\n", - "| 197624402|197624402|POINT (-3383818.5...|\n", - "| 197663196|197663196|POINT (-3383367.1...|\n", - "| 197953474|197953474|POINT (-3383763.3...|\n", - "| 262310516|262310516|POINT (-3384257.6...|\n", - "|1074233123|262310516|POINT (-3384262.1...|\n", - "| 270281140|270281140|POINT (-3385421.2...|\n", - "|1074232906|270281140|POINT (-3385408.6...|\n", - "| 270306609|270306609|POINT (-3383982.8...|\n", - "| 270306746|270306746|POINT (-3383898.4...|\n", - "| 280402616|280402616|POINT (-3378817.6...|\n", - "| 839725400|280402616|POINT (-3378841.1...|\n", - "| 293896571|293896571|POINT (-3385029.0...|\n", - "|3256728465|293896571|POINT (-3385002.4...|\n", - "| 310838954|310838954|POINT (-3390510.5...|\n", - "| 311395303|311395303|POINT (-3389444.4...|\n", - "| 311395425|311395425|POINT (-3389867.6...|\n", - "|6339786017|311395425|POINT (-3389850.1...|\n", - "| 825853330|311395425|POINT (-3389877.4...|\n", - "| 945009922|311395425|POINT (-3389878.6...|\n", - "| 320100848|320100848|POINT (-3389610.6...|\n", - "+----------+---------+--------------------+\n", - "only showing top 20 rows\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "neighbours_within_1000m.show()" ] @@ -548,24 +322,16 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "23/07/03 21:13:54 WARN JoinQuery: UseIndex is true, but no index exists. Will build index on the fly.\n" - ] - } - ], + "outputs": [], "source": [ "df = neighbours_within_1000m.toPandas()" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -574,129 +340,9 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
id_1id_2geom
0197624402197624402POINT (-3383818.580 4179182.169)
1197663196197663196POINT (-3383367.151 4179427.096)
2197953474197953474POINT (-3383763.332 4179408.785)
3262310516262310516POINT (-3384257.682 4178033.053)
41074233123262310516POINT (-3384262.187 4178036.442)
............
4531467855483546785548354POINT (-3271487.870 4337964.529)
4531567855483566785548356POINT (-3273379.389 4338379.126)
4531667855483576785548357POINT (-3273745.222 4338528.241)
4531767855483586785548358POINT (-3273027.996 4338093.401)
4531868174167046817416704POINT (-3214549.268 4314872.904)
\n", - "

45319 rows × 3 columns

\n", - "
" - ], - "text/plain": [ - " id_1 id_2 geom\n", - "0 197624402 197624402 POINT (-3383818.580 4179182.169)\n", - "1 197663196 197663196 POINT (-3383367.151 4179427.096)\n", - "2 197953474 197953474 POINT (-3383763.332 4179408.785)\n", - "3 262310516 262310516 POINT (-3384257.682 4178033.053)\n", - "4 1074233123 262310516 POINT (-3384262.187 4178036.442)\n", - "... ... ... ...\n", - "45314 6785548354 6785548354 POINT (-3271487.870 4337964.529)\n", - "45315 6785548356 6785548356 POINT (-3273379.389 4338379.126)\n", - "45316 6785548357 6785548357 POINT (-3273745.222 4338528.241)\n", - "45317 6785548358 6785548358 POINT (-3273027.996 4338093.401)\n", - "45318 6817416704 6817416704 POINT (-3214549.268 4314872.904)\n", - "\n", - "[45319 rows x 3 columns]" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "gdf" ] @@ -729,5 +375,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/binder/ApacheSedonaSQL_SpatialJoin_AirportsPerCountry.ipynb b/binder/ApacheSedonaSQL_SpatialJoin_AirportsPerCountry.ipynb index 225e6ad8e2..2f0b49480a 100644 --- a/binder/ApacheSedonaSQL_SpatialJoin_AirportsPerCountry.ipynb +++ b/binder/ApacheSedonaSQL_SpatialJoin_AirportsPerCountry.ipynb @@ -37,7 +37,6 @@ "\n", "\n", "from sedona.spark import *\n", - "from keplergl import KeplerGl\n", "from utilities import getConfig\n", "\n" ] @@ -54,28 +53,36 @@ "execution_count": 2, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "23/10/16 17:17:34 WARN Utils: Your hostname, Nileshs-MacBook-Pro.local resolves to a loopback address: 127.0.0.1; using 172.24.19.124 instead (on interface en0)\n", + "23/10/16 17:17:34 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address\n", + "Ivy Default Cache set to: /Users/nileshgajwani/.ivy2/cache\n", + "The jars for the packages stored in: /Users/nileshgajwani/.ivy2/jars\n", + "org.apache.sedona#sedona-spark-shaded-3.0_2.12 added as a dependency\n", + "org.datasyslab#geotools-wrapper added as a dependency\n", + ":: resolving dependencies :: org.apache.spark#spark-submit-parent-1d9bf0a6-87f2-4251-a8da-102cb544a8f9;1.0\n", + "\tconfs: [default]\n", + "\tfound org.apache.sedona#sedona-spark-shaded-3.0_2.12;1.5.0 in central\n", + "\tfound org.datasyslab#geotools-wrapper;1.4.0-28.2 in central\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ - ":: loading settings :: url = jar:file:/Users/nileshgajwani/Desktop/spark/spark-3.4.0-bin-hadoop3/jars/ivy-2.5.1.jar!/org/apache/ivy/core/settings/ivysettings.xml\n" + ":: loading settings :: url = jar:file:/Users/nileshgajwani/Downloads/spark-3.4.1-bin-hadoop3/jars/ivy-2.5.1.jar!/org/apache/ivy/core/settings/ivysettings.xml\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "Ivy Default Cache set to: /Users/nileshgajwani/.ivy2/cache\n", - "The jars for the packages stored in: /Users/nileshgajwani/.ivy2/jars\n", - "org.apache.sedona#sedona-spark-shaded-3.0_2.12 added as a dependency\n", - "org.datasyslab#geotools-wrapper added as a dependency\n", - ":: resolving dependencies :: org.apache.spark#spark-submit-parent-2ebc22b4-bd08-4a3f-a2dc-bd50e2f0f728;1.0\n", - "\tconfs: [default]\n", - "\tfound org.apache.sedona#sedona-spark-shaded-3.0_2.12;1.4.1 in central\n", - "\tfound org.datasyslab#geotools-wrapper;1.4.0-28.2 in central\n", - ":: resolution report :: resolve 85ms :: artifacts dl 3ms\n", + ":: resolution report :: resolve 67ms :: artifacts dl 2ms\n", "\t:: modules in use:\n", - "\torg.apache.sedona#sedona-spark-shaded-3.0_2.12;1.4.1 from central in [default]\n", + "\torg.apache.sedona#sedona-spark-shaded-3.0_2.12;1.5.0 from central in [default]\n", "\torg.datasyslab#geotools-wrapper;1.4.0-28.2 from central in [default]\n", "\t---------------------------------------------------------------------\n", "\t| | modules || artifacts |\n", @@ -83,20 +90,24 @@ "\t---------------------------------------------------------------------\n", "\t| default | 2 | 0 | 0 | 0 || 2 | 0 |\n", "\t---------------------------------------------------------------------\n", - ":: retrieving :: org.apache.spark#spark-submit-parent-2ebc22b4-bd08-4a3f-a2dc-bd50e2f0f728\n", + ":: retrieving :: org.apache.spark#spark-submit-parent-1d9bf0a6-87f2-4251-a8da-102cb544a8f9\n", "\tconfs: [default]\n", - "\t0 artifacts copied, 2 already retrieved (0kB/2ms)\n", - "23/07/12 14:17:39 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n", + "\t0 artifacts copied, 2 already retrieved (0kB/3ms)\n", + "23/10/16 17:17:35 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n", "Setting default log level to \"WARN\".\n", "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n", - "23/07/12 14:17:43 WARN SimpleFunctionRegistry: The function st_affine replaced a previously registered function.\n" + "Error while parsing JAI registry file \"file:/Users/nileshgajwani/Downloads/spark-3.4.1-bin-hadoop3/jars/sedona-spark-shaded-3.4_2.12-1.5.1-SNAPSHOT.jar!/META-INF/registryFile.jai\" :\n", + "Error in registry file at line number #31\n", + "A descriptor is already registered against the name \"org.geotools.ColorReduction\" under registry mode \"rendered\"\n", + "Error in registry file at line number #32\n", + "A descriptor is already registered against the name \"org.geotools.ColorInversion\" under registry mode \"rendered\"\n" ] } ], "source": [ "config = SedonaContext.builder() .\\\n", " config('spark.jars.packages',\n", - " 'org.apache.sedona:sedona-spark-shaded-3.0_2.12:1.4.1,'\n", + " 'org.apache.sedona:sedona-spark-shaded-3.0_2.12:1.5.0,'\n", " 'org.datasyslab:geotools-wrapper:1.4.0-28.2'). \\\n", " getOrCreate()\n", "\n", @@ -225,7 +236,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "23/07/12 14:17:43 WARN package: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.\n" + "23/10/16 17:17:38 WARN package: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.\n" ] } ], @@ -308,7 +319,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[('3.0', '2.12', '1.4.1')]\n" + "[('3.0', '2.12', '1.5.0')]\n" ] } ], @@ -355,7 +366,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "23/07/12 14:17:44 WARN JoinQuery: UseIndex is true, but no index exists. Will build index on the fly.\n" + "23/10/16 17:17:39 WARN JoinQuery: UseIndex is true, but no index exists. Will build index on the fly.\n" ] }, { @@ -500,7 +511,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "0646646608754887811eee12e5516d16", + "model_id": "3886ecefe3884d5f97a469b82a66cea5", "version_major": 2, "version_minor": 0 }, @@ -513,11 +524,8 @@ } ], "source": [ - "df = groupedresult.toPandas()\n", - "gdf = gpd.GeoDataFrame(df, geometry=\"country_geom\").rename(columns={'country_geom':'geometry'})\n", - "\n", - "map = KeplerGl(data={\"AirportCount\": gdf}, config=getConfig())\n", - "map" + "sedona_kepler_map = SedonaKepler.create_map(df=groupedresult, name=\"AirportCount\", config=getConfig())\n", + "sedona_kepler_map" ] }, { diff --git a/binder/Pipfile b/binder/Pipfile index 080c3087d6..25968ae90e 100644 --- a/binder/Pipfile +++ b/binder/Pipfile @@ -16,10 +16,11 @@ shapely="==1.8.4" geopandas="==0.11.1" pyspark="==3.3.2" attrs="*" -apache-sedona="==1.4.1" +apache-sedona="==1.5.0" matplotlib = "*" descartes = "*" keplergl = "==0.3.2" +pydeck = "==0.8.0" ipywidgets = "*" jupyterlab-widgets = "*" ipykernel = "*" diff --git a/examples/flink-sql/pom.xml b/examples/flink-sql/pom.xml index 13e62ab3cb..a08a147096 100644 --- a/examples/flink-sql/pom.xml +++ b/examples/flink-sql/pom.xml @@ -12,7 +12,7 @@ 1.4.0-28.2 compile 2.12 - 1.4.0 + 1.5.0 1.14.3 compile diff --git a/examples/spark-rdd-colocation-mining/build.sbt b/examples/spark-rdd-colocation-mining/build.sbt index e8df972ff3..d380f82d01 100644 --- a/examples/spark-rdd-colocation-mining/build.sbt +++ b/examples/spark-rdd-colocation-mining/build.sbt @@ -39,7 +39,7 @@ val SparkCompatibleVersion = "3.0" val HadoopVersion = "2.7.2" -val SedonaVersion = "1.4.0" +val SedonaVersion = "1.5.0" val ScalaCompatibleVersion = "2.12" diff --git a/examples/spark-sql/build.sbt b/examples/spark-sql/build.sbt index aeb45a6c4b..8ea074656b 100644 --- a/examples/spark-sql/build.sbt +++ b/examples/spark-sql/build.sbt @@ -39,7 +39,7 @@ val SparkCompatibleVersion = "3.0" val HadoopVersion = "2.7.2" -val SedonaVersion = "1.4.0" +val SedonaVersion = "1.5.0" val ScalaCompatibleVersion = "2.12" diff --git a/examples/spark-viz/build.sbt b/examples/spark-viz/build.sbt index bc2260a89a..afc084f514 100644 --- a/examples/spark-viz/build.sbt +++ b/examples/spark-viz/build.sbt @@ -39,7 +39,7 @@ val SparkCompatibleVersion = "3.0" val HadoopVersion = "2.7.2" -val SedonaVersion = "1.4.0" +val SedonaVersion = "1.5.0" val ScalaCompatibleVersion = "2.12"