diff --git a/binder/ApacheSedonaRaster.ipynb b/binder/ApacheSedonaRaster.ipynb
index 23f3a1cae5..cde6598223 100644
--- a/binder/ApacheSedonaRaster.ipynb
+++ b/binder/ApacheSedonaRaster.ipynb
@@ -2,6 +2,7 @@
"cells": [
{
"cell_type": "markdown",
+ "id": "fbefc0bd-731b-43e4-b271-6cb4cba5c256",
"metadata": {},
"source": [
"```\n",
@@ -22,38 +23,40 @@
"```"
]
},
+ {
+ "cell_type": "markdown",
+ "id": "b443d3d3-1667-4770-b57c-7f79a3ea5d42",
+ "metadata": {},
+ "source": [
+ "## Import Sedona"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
+ "id": "328d0b74-1efd-468c-bc96-a469965df60b",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
- "from IPython.display import display, HTML\n",
- "from pyspark.sql import SparkSession\n",
- "from pyspark import StorageLevel\n",
- "import pandas as pd\n",
- "from pyspark.sql.types import StructType, StructField,StringType, LongType, IntegerType, DoubleType, ArrayType\n",
- "from pyspark.sql.functions import regexp_replace\n",
- "from pyspark.sql.functions import col, split, expr\n",
- "from pyspark.sql.functions import udf, lit\n",
"from sedona.spark import *\n",
- "from pyspark.sql.functions import col, split, expr\n",
- "from pyspark.sql.functions import udf, lit\n",
- "import os\n"
+ "from IPython.display import display, HTML"
]
},
{
"cell_type": "markdown",
+ "id": "f28c8117-069c-431c-ac58-6ff258b1196d",
"metadata": {},
"source": [
- "# Create Spark Session for application"
+ "## Create a Sedona Context object.\n",
+ "If you already have a spark instance available, simply use ```SedonaContext.create(spark)```."
]
},
{
"cell_type": "code",
"execution_count": null,
+ "id": "e3495923-7fb4-4a6e-b62e-a4eeb9c2b306",
"metadata": {
"tags": []
},
@@ -61,522 +64,629 @@
"source": [
"config = SedonaContext.builder() .\\\n",
" config('spark.jars.packages',\n",
- " 'org.apache.sedona:sedona-spark-shaded-3.0_2.12:1.5.0,'\n",
+ " 'org.apache.sedona:sedona-spark-shaded-3.4_2.12:1.5.0,'\n",
" 'org.datasyslab:geotools-wrapper:1.5.0-28.2'). \\\n",
" getOrCreate()\n",
"\n",
"sedona = SedonaContext.create(config)\n",
"\n",
- "sc = sedona.sparkContext\n"
+ "sc = sedona.sparkContext"
]
},
{
"cell_type": "markdown",
+ "id": "91d4e6ae-eeb6-46ca-89fd-8f82e6056924",
"metadata": {},
"source": [
- "# Geotiff Loader \n",
- "\n",
- "1. Loader takes as input a path to directory which contains geotiff files or a path to particular geotiff file\n",
- "2. Loader will read geotiff image in a struct named image which contains multiple fields as shown in the schema below which can be extracted using spark SQL"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "# Path to directory of geotiff images \n",
- "DATA_DIR = \"./data/raster/\""
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "scrolled": true,
- "tags": []
- },
- "outputs": [],
- "source": [
- "df = sedona.read.format(\"geotiff\").option(\"dropInvalid\",True).option(\"readToCRS\", \"EPSG:4326\").option(\"disableErrorInCRS\", False).load(DATA_DIR)\n",
- "df.printSchema()"
+ "## Read GeoTiff files"
]
},
{
"cell_type": "code",
"execution_count": null,
+ "id": "58c05200-27f7-46ce-b2c5-4c1dc058c96e",
"metadata": {},
"outputs": [],
"source": [
- "df = df.selectExpr(\"image.origin as origin\",\"ST_GeomFromWkt(image.geometry) as Geom\", \"image.height as height\", \"image.width as width\", \"image.data as data\", \"image.nBands as bands\")\n",
- "df.show(5)"
+ "geotiff_df = sedona.read.format(\"binaryFile\").load(\"data/raster/test5.tiff\")\n",
+ "geotiff_df.show(2)\n",
+ "geotiff_df.createOrReplaceTempView(\"binary_raster\")"
]
},
{
"cell_type": "markdown",
+ "id": "db66242c-d0b3-4348-b2ef-4344d266cb4c",
"metadata": {},
"source": [
- "# Extract a particular band from geotiff dataframe using RS_GetBand()\n"
+ "## Create raster columns from the read binary data"
]
},
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "id": "36eb9e36-cbcb-472a-96c6-79d49305cf66",
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
- "'''RS_GetBand() will fetch a particular band from given data array which is the concatenation of all the bands'''\n",
- "\n",
- "df = df.selectExpr(\"Geom\",\"RS_GetBand(data, 1,bands) as Band1\",\"RS_GetBand(data, 2,bands) as Band2\",\"RS_GetBand(data, 3,bands) as Band3\", \"RS_GetBand(data, 4,bands) as Band4\")\n",
- "df.createOrReplaceTempView(\"allbands\")\n",
- "df.show(5)"
+ "raster_df = sedona.sql(\"SELECT RS_FromGeoTiff(content) as raster from binary_raster\")\n",
+ "raster_df.show(2)\n",
+ "raster_df.createOrReplaceTempView(\"raster_table\")"
]
},
{
"cell_type": "markdown",
+ "id": "3932eb9e-aeb6-4abe-a986-f26a11eb1fe3",
"metadata": {},
"source": [
- "# Map Algebra operations on band values"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "'''RS_NormalizedDifference can be used to calculate NDVI for a particular geotiff image since it uses same computational formula as ndvi'''\n",
- "\n",
- "NomalizedDifference = df.selectExpr(\"RS_NormalizedDifference(Band1, Band2) as normDiff\")\n",
- "NomalizedDifference.show(5)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "'''RS_Mean() can used to calculate mean of piel values in a particular spatial band'''\n",
- "meanDF = df.selectExpr(\"RS_Mean(Band1) as mean\")\n",
- "meanDF.show(5)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "\"\"\" RS_Mode() is used to calculate mode in an array of pixels and returns a array of double with size 1 in case of unique mode\"\"\"\n",
- "modeDF = df.selectExpr(\"RS_Mode(Band1) as mode\")\n",
- "modeDF.show(5)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "'''RS_GreaterThan() is used to mask all the values with 1 which are greater than a particular threshold'''\n",
- "greaterthanDF = sedona.sql(\"Select RS_GreaterThan(Band1,1000.0) as greaterthan from allbands\")\n",
- "greaterthanDF.show()"
+ "## Operate on rasters using Sedona\n",
+ "Once a raster column is created, you're now free to use the entire catalog of Sedona's [raster functions](https://sedona.apache.org/1.5.0/api/sql/Raster-operators/). The following part of notebook contains a few examples."
]
},
{
- "cell_type": "code",
- "execution_count": null,
+ "cell_type": "markdown",
+ "id": "1b47699e-8ce4-4859-ace9-d12ea1f4d0b9",
"metadata": {},
- "outputs": [],
"source": [
- "'''RS_GreaterThanEqual() is used to mask all the values with 1 which are greater than a particular threshold'''\n",
- "\n",
- "greaterthanEqualDF = sedona.sql(\"Select RS_GreaterThanEqual(Band1,360.0) as greaterthanEqual from allbands\")\n",
- "greaterthanEqualDF.show()"
+ "### Access raster metadata\n",
+ "[RS_MetaData](https://sedona.apache.org/1.5.0/api/sql/Raster-operators/#rs_metadata) can be used to view the loaded raster's metadata (orientation and georeferencing attributes)."
]
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "'''RS_LessThan() is used to mask all the values with 1 which are less than a particular threshold'''\n",
- "lessthanDF = sedona.sql(\"Select RS_LessThan(Band1,1000.0) as lessthan from allbands\")\n",
- "lessthanDF.show()"
+ "execution_count": 42,
+ "id": "6d635263-9e8b-4f74-9b91-d360d196b966",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[-180.0, 90.0, 1440.0, 720.0, 0.25, -0.25, 0.0, 0.0, 4326.0, 1.0]"
+ ]
+ },
+ "execution_count": 42,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "raster_metadata = sedona.sql(\"SELECT RS_MetaData(raster) as metadata from raster_table\")\n",
+ "metadata = raster_metadata.first()[0]\n",
+ "raster_srid = metadata[8]\n",
+ "metadata"
]
},
{
- "cell_type": "code",
- "execution_count": null,
+ "cell_type": "markdown",
+ "id": "713bc8db-3143-4a79-abb5-08ad81f9393a",
"metadata": {},
- "outputs": [],
"source": [
- "'''RS_LessThanEqual() is used to mask all the values with 1 which are less than equal to a particular threshold'''\n",
- "lessthanEqualDF = sedona.sql(\"Select RS_LessThanEqual(Band1,2890.0) as lessthanequal from allbands\")\n",
- "lessthanEqualDF.show()"
+ "### Visualize rasters\n",
+ "Sedona 1.5.0 provides [multiple ways to be able to visualize rasters](https://sedona.apache.org/1.5.0/api/sql/Raster-visualizer/). Throughout this notebook, [RS_AsImage](https://sedona.apache.org/1.5.0/api/sql/Raster-visualizer/#rs_asimage) will be used to visualize any changes to the rasters."
]
},
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "id": "d5f615f4-a3d6-407c-aea9-58891c1e55e3",
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
- "'''RS_Add() can add two spatial bands together'''\n",
- "sumDF = df.selectExpr(\"RS_Add(Band1, Band2) as sumOfBand\")\n",
- "sumDF.show(5)"
+ "# Define a simple wrapper to display HTML in jupyter notebook environment\n",
+ "class SedonaUtils:\n",
+ " @classmethod\n",
+ " def display_image(cls, df):\n",
+ " display(HTML(df.toPandas().to_html(escape=False)))"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "'''RS_Subtract() can subtract two spatial bands together'''\n",
- "subtractDF = df.selectExpr(\"RS_Subtract(Band1, Band2) as diffOfBand\")\n",
- "subtractDF.show(5)"
+ "execution_count": 43,
+ "id": "7fad137f-331c-4c2f-905d-dbc42cff11b6",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " rs_asimage(raster, 500) | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "SedonaUtils.display_image(raster_df.selectExpr(\"RS_AsImage(raster, 500)\"))"
]
},
{
- "cell_type": "code",
- "execution_count": null,
+ "cell_type": "markdown",
+ "id": "cef34e54-ac3c-48f3-836c-5a5385b79481",
"metadata": {},
- "outputs": [],
"source": [
- "'''RS_Multiply() can multiple two bands together'''\n",
- "multiplyDF = df.selectExpr(\"RS_Multiply(Band1, Band2) as productOfBand\")\n",
- "multiplyDF.show(5)"
+ "### Join based on raster predicates\n",
+ "Sedona 1.5.0 now supports join predicates between raster and geometry columns.\n",
+ "\n",
+ "Below is a simple example that carves a small rectangle from the existing raster and attempts to join it with the original raster"
]
},
{
"cell_type": "code",
"execution_count": null,
+ "id": "6442349c-be2e-4609-a16e-b856745ddf46",
"metadata": {},
"outputs": [],
"source": [
- "'''RS_Divide() can divide two bands together'''\n",
- "divideDF = df.selectExpr(\"RS_Divide(Band1, Band2) as divisionOfBand\")\n",
- "divideDF.show(5)"
+ "(width, height) = sedona.sql(\"SELECT RS_Width(raster) as width, RS_Height(raster) as height from raster_table\").first()\n",
+ "(p1X, p1Y) = sedona.sql(f\"SELECT RS_RasterToWorldCoordX(raster, {width / 2}, {height / 2}) \\\n",
+ " as pX, RS_RasterToWorldCoordY(raster, {width / 2}, {height / 2}) as pY from raster_table\").first()\n",
+ "(p2X, p2Y) = sedona.sql(f\"SELECT RS_RasterToWorldCoordX(raster, {(width / 2) + 2}, {height / 2}) \\\n",
+ " as pX, RS_RasterToWorldCoordY(raster, {(width / 2) + 2}, {height / 2}) as pY from raster_table\").first()\n",
+ "(p3X, p3Y) = sedona.sql(f\"SELECT RS_RasterToWorldCoordX(raster, {width / 2}, {(height / 2) + 2}) \\\n",
+ " as pX, RS_RasterToWorldCoordY(raster, {width / 2}, {(height / 2) + 2}) as pY from raster_table\").first()\n",
+ "(p4X, p4Y) = sedona.sql(f\"SELECT RS_RasterToWorldCoordX(raster, {(width / 2) + 2}, {(height / 2) + 2}) \\\n",
+ " as pX, RS_RasterToWorldCoordY(raster, {(width / 2) + 2}, {(height / 2) + 2}) as pY from raster_table\").first() "
]
},
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "id": "ed399ee8-42b7-488b-8141-320c2bf6d9c3",
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
- "'''RS_MultiplyFactor() will multiply a factor to a spatial band'''\n",
- "mulfacDF = df.selectExpr(\"RS_MultiplyFactor(Band2, 2) as target\")\n",
- "mulfacDF.show(5)"
+ "geom_wkt = f\"SRID={int(raster_srid)};POLYGON (({p1X} {p1Y}, {p2X} {p2Y}, {p3X} {p3Y}, {p4X} {p4Y}, {p1X} {p1Y}))\""
]
},
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "id": "cb8aa25f-4706-4ee7-9994-3da474c3eb2c",
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
- "'''RS_BitwiseAND() will return AND between two values of Bands'''\n",
- "bitwiseAND = df.selectExpr(\"RS_BitwiseAND(Band1, Band2) as AND\")\n",
- "bitwiseAND.show(5)"
+ "geom_df = sedona.sql(f\"SELECT ST_GeomFromEWKT('{geom_wkt}') as geom\")\n",
+ "geom_df.createOrReplaceTempView(\"geom_table\")"
]
},
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "id": "6461c14c-d479-4c64-8f8f-8c21903dedf5",
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
- "'''RS_BitwiseOR() will return OR between two values of Bands'''\n",
- "bitwiseOR = df.selectExpr(\"RS_BitwiseOR(Band1, Band2) as OR\")\n",
- "bitwiseOR.show(5)"
+ "joined_df = sedona.sql(\"SELECT g.geom from raster_table r, geom_table g where RS_Intersects(r.raster, g.geom)\")\n",
+ "joined_df.show()"
]
},
{
- "cell_type": "code",
- "execution_count": null,
+ "cell_type": "markdown",
+ "id": "9be5e7db-17e5-4bab-b7a3-8ee278374355",
"metadata": {},
- "outputs": [],
"source": [
- "'''RS_Count() will calculate the total number of occurrence of a target value'''\n",
- "countDF = df.selectExpr(\"RS_Count(RS_GreaterThan(Band1,1000.0), 1.0) as count\")\n",
- "countDF.show(5)"
+ "### Interoperability between raster and vector data types\n",
+ "Sedona allows for conversions from raster to geometry and vice-versa. "
]
},
{
- "cell_type": "code",
- "execution_count": null,
+ "cell_type": "markdown",
+ "id": "cc0bfd1c-7117-444a-8189-881da19846c9",
"metadata": {},
- "outputs": [],
"source": [
- "'''RS_Modulo() will calculate the modulus of band value with respect to a given number'''\n",
- "moduloDF = df.selectExpr(\"RS_Modulo(Band1, 21.0) as modulo \")\n",
- "moduloDF.show(5)"
+ "### Convert a raster to vector using convex hull\n",
+ "A convex hull geometry can be created out of a raster using [RS_ConvexHull](https://sedona.apache.org/1.5.0/api/sql/Raster-operators/#rs_convexhull)\n",
+ "\n",
+ "Additionally, if a raster has noDataValue specified, and you wish to tighten the convexhull to exclude noDataValue boundaries, [RS_MinConvexHull](https://sedona.apache.org/1.5.0/api/sql/Raster-operators/#rs_minconvexhull) can be used."
]
},
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "id": "22b9dd16-f720-4fa4-acb9-b80c34702a93",
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
- "'''RS_SquareRoot() will calculate calculate square root of all the band values up to two decimal places'''\n",
- "rootDF = df.selectExpr(\"RS_SquareRoot(Band1) as root\")\n",
- "rootDF.show(5)\n"
+ "raster_convex_hull = sedona.sql(\"SELECT RS_ConvexHull(raster) as convex_hull from raster_table\")\n",
+ "raster_min_convex_hull = sedona.sql(\"SELECT RS_MinConvexHull(raster) as min_convex_hull from raster_table\")\n",
+ "raster_convex_hull.show(truncate=False)\n",
+ "raster_min_convex_hull.show(truncate=False)"
]
},
{
- "cell_type": "code",
- "execution_count": null,
+ "cell_type": "markdown",
+ "id": "8ca7e862-45c9-4559-a2e1-4e044d6b5c84",
"metadata": {},
- "outputs": [],
"source": [
- "'''RS_LogicalDifference() will return value from band1 if value at that particular location is not equal tp band1 else it will return 0'''\n",
- "logDiff = df.selectExpr(\"RS_LogicalDifference(Band1, Band2) as loggDifference\")\n",
- "logDiff.show(5)"
+ "### Convert a geometry to raster (Rasterize a geometry)\n",
+ "A geometry can be converted to a raster using [RS_AsRaster](https://sedona.apache.org/1.5.0/api/sql/Raster-writer/#rs_asraster)"
]
},
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "id": "8bc32fc6-d418-4e7c-8631-57e2c623f14c",
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
- "'''RS_LogicalOver() will iterate over two bands and return value of first band if it is not equal to 0 else it will return value from later band'''\n",
- "logOver = df.selectExpr(\"RS_LogicalOver(Band3, Band2) as logicalOver\")\n",
- "logOver.show(5)"
+ "rasterized_geom_df = sedona.sql(\"SELECT RS_AsRaster(ST_GeomFromWKT('POLYGON((150 150, 220 260, 190 300, 300 220, 150 150))'), r.raster, 'b', 230) as rasterized_geom from raster_table r\")\n",
+ "rasterized_geom_df.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "id": "a7eecae9-3763-405f-a22e-c7d77ff703b0",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "23/10/31 15:21:06 WARN VectorToRasterProcess: coercing double feature values to float raster values\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ " | \n",
+ " rasterized_geom | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "SedonaUtils.display_image(rasterized_geom_df.selectExpr(\"RS_AsImage(rasterized_geom, 250) as rasterized_geom\"))"
]
},
{
"cell_type": "markdown",
+ "id": "df954a81-5004-40f7-b80e-795f8569757c",
"metadata": {},
"source": [
- "# Visualising Geotiff Images\n",
+ "### Perform Map Algebra operations\n",
+ "Sedona provides two ways to perform [Map Algebra](https://sedona.apache.org/1.5.0/api/sql/Raster-map-algebra/) on rasters:\n",
+ "1. Using RS_MapAlgebra (preferred for simpler algebraic functions)\n",
+ "2. Using RS_BandAsArray and array based map algebra functions such as RS_Add, RS_Multiply (Useful for complex algebriac functions involving mutating each grid value differently.)\n",
"\n",
- "1. Normalize the bands in range [0-255] if values are greater than 255\n",
- "2. Process image using RS_Base64() which converts in into a base64 string\n",
- "3. Embed results of RS_Base64() in RS_HTML() to embed into IPython notebook\n",
- "4. Process results of RS_HTML() as below:"
+ "The following example illustrates how RS_MapAlgebra can be used. \n",
+ "This example uses jiffle script to invert the colors of the above illustrated rasterized geometry."
]
},
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "id": "53abef31-b1aa-42ef-8eb0-f1d9227e3893",
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
- "'''Plotting images as a dataframe using geotiff Dataframe.'''\n",
- "\n",
- "df = sedona.read.format(\"geotiff\").option(\"dropInvalid\",True).option(\"readToCRS\", \"EPSG:4326\").load(DATA_DIR)\n",
- "df = df.selectExpr(\"image.origin as origin\",\"ST_GeomFromWkt(image.geometry) as Geom\", \"image.height as height\", \"image.width as width\", \"image.data as data\", \"image.nBands as bands\")\n",
- "\n",
- "df = df.selectExpr(\"RS_GetBand(data,1,bands) as targetband\", \"height\", \"width\", \"bands\", \"Geom\")\n",
- "df_base64 = df.selectExpr(\"Geom\", \"RS_Base64(height,width,RS_Normalize(targetBand), RS_Array(height*width,0.0), RS_Array(height*width, 0.0)) as red\",\"RS_Base64(height,width,RS_Array(height*width, 0.0), RS_Normalize(targetBand), RS_Array(height*width, 0.0)) as green\", \"RS_Base64(height,width,RS_Array(height*width, 0.0), RS_Array(height*width, 0.0), RS_Normalize(targetBand)) as blue\",\"RS_Base64(height,width,RS_Normalize(targetBand), RS_Normalize(targetBand),RS_Normalize(targetBand)) as RGB\" )\n",
- "df_HTML = df_base64.selectExpr(\"Geom\",\"RS_HTML(red) as RedBand\",\"RS_HTML(blue) as BlueBand\",\"RS_HTML(green) as GreenBand\", \"RS_HTML(RGB) as CombinedBand\")\n",
- "df_HTML.show(5)"
+ "raster_white_bg = rasterized_geom_df.selectExpr(\"RS_MapAlgebra(rasterized_geom, NULL, 'out[0] = rast[0] == 0 ? 230 : 0;') as raster\")"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "display(HTML(df_HTML.limit(2).toPandas().to_html(escape=False)))"
+ "execution_count": 45,
+ "id": "75f06a1b-1ab6-478b-a50e-b621a10d6d8b",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "23/10/31 15:21:09 WARN VectorToRasterProcess: coercing double feature values to float raster values\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ " | \n",
+ " resampled_raster | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "SedonaUtils.display_image(raster_white_bg.selectExpr(\"RS_AsImage(raster, 250) as resampled_raster\"))"
]
},
{
"cell_type": "markdown",
+ "id": "fde725ec-2941-4b6e-9b52-5fd35cea6c01",
"metadata": {},
"source": [
- "# Writing GeoTiff Images"
+ "### Resample a raster.\n",
+ "Sedona 1.5.0 supports resampling a raster to different height/width or scale. It also supports changing the pivot of the raster.\n",
+ "\n",
+ "Refer to [RS_Resample](https://sedona.apache.org/1.5.0/api/sql/Raster-operators/#rs_resample) documentation for more details.\n",
+ "\n",
+ "This simple example changes the resolution of the loaded raster to 1000*1000"
]
},
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "id": "c8fdb8c7-52d5-49fa-83f2-44a9438bd509",
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
- "'''Writing GeoTiff DataFrames as GeoTiff Images'''\n",
- "\n",
- "df = sedona.read.format(\"geotiff\").option(\"dropInvalid\",True).option(\"readToCRS\", \"EPSG:4326\").load(DATA_DIR)\n",
- "df = df.selectExpr(\"image.origin as origin\",\"ST_GeomFromWkt(image.geometry) as Geom\", \"image.height as height\", \"image.width as width\", \"image.data as data\", \"image.nBands as bands\")\n",
- "\n",
- "SAVE_PATH = \"./data/raster-written/\"\n",
- "df.write.mode(\"overwrite\").format(\"geotiff\").option(\"writeToCRS\", \"EPSG:4326\").option(\"fieldGeometry\", \"Geom\").option(\"fieldNBands\", \"bands\").save(SAVE_PATH)"
+ "resampled_raster_df = sedona.sql(\"SELECT RS_Resample(raster, 1000, 1000, false, 'NearestNeighbor') as resampled_raster from raster_table\")"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 47,
+ "id": "b14820dc-ed04-41cd-9220-73a5179f52df",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ " | \n",
+ " resampled_raster | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "SedonaUtils.display_image(resampled_raster_df.selectExpr(\"RS_AsImage(resampled_raster, 500) as resampled_raster\"))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "id": "bee36339-d0c1-469d-9354-980a23f24401",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "+------------------------------------------------------------------+\n",
+ "|resampled_raster_metadata |\n",
+ "+------------------------------------------------------------------+\n",
+ "|[-180.0, 90.0, 1000.0, 1000.0, 0.36, -0.18, 0.0, 0.0, 4326.0, 1.0]|\n",
+ "+------------------------------------------------------------------+\n",
+ "\n"
+ ]
+ }
+ ],
"source": [
- "'''Writing GeoTiff Images in a Single Partition'''\n",
- "df.coalesce(1).write.mode(\"overwrite\").format(\"geotiff\").option(\"writeToCRS\", \"EPSG:4326\").option(\"fieldGeometry\", \"Geom\").option(\"fieldNBands\", \"bands\").save(SAVE_PATH)"
+ "resampled_raster_df.selectExpr(\"RS_MetaData(resampled_raster) as resampled_raster_metadata\").show(truncate=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "id": "2b0aa64e-4a02-4c85-9ba5-6459d2002f8a",
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
- "'''Find the Partition of the Written GeoTiff Images.\n",
- " If you did not write with coalesce(1), change the below code to adjust the writtenPath'''\n",
- "writtenPath = SAVE_PATH\n",
- "dirList = os.listdir(writtenPath)\n",
- "for item in dirList:\n",
- " if os.path.isdir(writtenPath + \"/\" + item):\n",
- " writtenPath += \"/\" + item\n",
- " break"
+ "# Load another raster for some more examples\n",
+ "elevation_raster_df = sedona.read.format('binaryFile').load('data/raster/test1.tiff')\n",
+ "elevation_raster_df.createOrReplaceTempView(\"elevation_raster_binary\")"
]
},
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "id": "623123ac-98bc-4d51-828d-9d874cc6f471",
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
- "'''Load and Visualize Written GeoTiff Image.'''\n",
- "\n",
- "df = sedona.read.format(\"geotiff\").option(\"dropInvalid\",True).option(\"readToCRS\", \"EPSG:4326\").load(writtenPath)\n",
- "df = df.selectExpr(\"image.origin as origin\",\"ST_GeomFromWkt(image.geometry) as Geom\", \"image.height as height\", \"image.width as width\", \"image.data as data\", \"image.nBands as bands\")\n",
- "\n",
- "df = df.selectExpr(\"RS_GetBand(data,1,bands) as targetband\", \"height\", \"width\", \"bands\", \"Geom\")\n",
- "df_base64 = df.selectExpr(\"Geom\", \"RS_Base64(height,width,RS_Normalize(targetBand), RS_Array(height*width,0.0), RS_Array(height*width, 0.0)) as red\",\"RS_Base64(height,width,RS_Array(height*width, 0.0), RS_Normalize(targetBand), RS_Array(height*width, 0.0)) as green\", \"RS_Base64(height,width,RS_Array(height*width, 0.0), RS_Array(height*width, 0.0), RS_Normalize(targetBand)) as blue\",\"RS_Base64(height,width,RS_Normalize(targetBand), RS_Normalize(targetBand),RS_Normalize(targetBand)) as RGB\" )\n",
- "df_HTML = df_base64.selectExpr(\"Geom\",\"RS_HTML(red) as RedBand\",\"RS_HTML(blue) as BlueBand\",\"RS_HTML(green) as GreenBand\", \"RS_HTML(RGB) as CombinedBand\")\n",
- "display(HTML(df_HTML.limit(2).toPandas().to_html(escape=False)))"
+ "elevation_raster_df = sedona.sql(\"SELECT RS_FromGeoTiff(content) as raster from elevation_raster_binary\")\n",
+ "elevation_raster_df.createOrReplaceTempView(\"elevation_raster\")"
]
},
{
"cell_type": "markdown",
+ "id": "2a6afdf3-e774-432f-96a3-96a4ca8249c7",
"metadata": {},
"source": [
- "# Transformation of GeoTiff Images"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "'''First load GeoTiff Images'''\n",
- "df = sedona.read.format(\"geotiff\").option(\"dropInvalid\",True).option(\"readToCRS\", \"EPSG:4326\").option(\"disableErrorInCRS\", False).load(DATA_DIR)\n",
- "df = df.selectExpr(\"image.origin as origin\",\"ST_GeomFromWkt(image.geometry) as geom\", \"image.height as height\", \"image.width as width\", \"image.data as data\", \"image.nBands as bands\")\n",
- "df.show(5)"
+ "### Access individual values from rasters\n",
+ "Sedona provides [RS_Value](https://sedona.apache.org/1.5.0/api/sql/Raster-operators/#rs_value) and [RS_Values](https://sedona.apache.org/1.5.0/api/sql/Raster-operators/#rs_values) that allow accessing raster values at given geometrical point(s).\n",
+ "\n",
+ "The following example extracts raster values at specific geographical points."
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 48,
+ "id": "ffe589e1-50b7-431a-ba84-b2c297b77f65",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "+--------------+\n",
+ "| raster_values|\n",
+ "+--------------+\n",
+ "|[115.0, 148.0]|\n",
+ "+--------------+\n",
+ "\n"
+ ]
+ }
+ ],
"source": [
- "# First extract the bands for which normalized difference index needs to be calculated\n",
- "df = df.selectExpr(\"origin\", \"geom\", \"width\", \"height\", \"data\", \"bands\", \"RS_GetBand(data, 1, bands) as band1\", \"RS_GetBand(data, 2, bands) as band2\")\n",
- "# Get the normalized difference index between the extracted bands\n",
- "df = df.selectExpr(\"origin\", \"geom\", \"width\", \"height\", \"data\", \"bands\", \"RS_NormalizedDifference(band2, band1) as normalizedDifference\")\n",
- "df.show(5)"
+ "point_wkt_1 = 'SRID=3857;POINT (-13095600.809482181 4021100.7487925636)'\n",
+ "point_wkt_2 = 'SRID=3857;POINT (-13095500.809482181 4021000.7487925636)'\n",
+ "point_df = sedona.sql(\"SELECT ST_GeomFromEWKT('{}') as point_1, ST_GeomFromEWKT('{}') as point_2\".format(point_wkt_1, point_wkt_2))\n",
+ "point_df.createOrReplaceTempView(\"point_table\")\n",
+ "test_df = sedona.sql(\"SELECT RS_Values(raster, Array(point_1, point_2)) as raster_values from elevation_raster, point_table\")\n",
+ "test_df.show()"
]
},
{
- "cell_type": "code",
- "execution_count": null,
+ "cell_type": "markdown",
+ "id": "8643ed69-9128-49a9-80e7-f9115694695f",
"metadata": {},
- "outputs": [],
"source": [
- "'''RS_Append() takes the data array containing bands, a new band to be appended, and number of total bands in the data array.\n",
- " It appends the new band to the end of the data array and returns the appended data'''\n",
- "\n",
- "df = df.selectExpr(\"origin\", \"geom\", \"RS_Append(data, normalizedDifference, bands) as data_edited\", \"height\", \"width\", \"bands\").drop(\"data\")\n",
- "df = df.withColumn(\"nBand_edited\", col(\"bands\") + 1).drop(\"bands\")\n",
- "df.show()"
+ "### Extract individual bands from rasters\n",
+ "[RS_BandAsArray](https://sedona.apache.org/1.5.0/api/sql/Raster-operators/#rs_bandasarray) can be used to extract entire band values from a given raster"
]
},
{
"cell_type": "code",
"execution_count": null,
+ "id": "48f56157-ab07-456e-83fe-75d23f5bb28e",
"metadata": {},
"outputs": [],
"source": [
- "'''Writing GeoTiff DataFrames as GeoTiff Images'''\n",
- "SAVE_PATH = \"./data/raster-written/\"\n",
- "df.coalesce(1).write.mode(\"overwrite\").format(\"geotiff\").option(\"writeToCRS\", \"EPSG:4326\").option(\"fieldGeometry\", \"geom\").option(\"fieldNBands\", \"nBand_edited\").option(\"fieldData\", \"data_edited\").save(SAVE_PATH)"
+ "band = elevation_raster_df.selectExpr(\"RS_BandAsArray(raster, 1)\").first()[0]\n",
+ "print(band,) #Print entire band as an array horizontally"
]
},
{
"cell_type": "markdown",
+ "id": "e586b0e5-935a-47fa-8ebf-b63ddd9a48a8",
"metadata": {},
"source": [
- "# User can also create some UDF manually to manipulate Geotiff dataframes"
+ "### Visualize Raster MBRs"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 50,
+ "id": "2a2c7086-9588-48a7-a710-c10b8c5e4875",
"metadata": {},
"outputs": [],
"source": [
- "'''Sample UDF calculates sum of all the values in a band which are greater than 1000.0'''\n",
- "\n",
- "def SumOfValues(band):\n",
- " total = 0.0\n",
- " for num in band:\n",
- " if num>1000.0:\n",
- " total+=1\n",
- " return total\n",
- "\n",
- "df = sedona.read.format(\"geotiff\").option(\"dropInvalid\",True).option(\"readToCRS\", \"EPSG:4326\").load(DATA_DIR)\n",
- "df = df.selectExpr(\"image.origin as origin\",\"ST_GeomFromWkt(image.geometry) as Geom\", \"image.height as height\", \"image.width as width\", \"image.data as data\", \"image.nBands as bands\")\n",
- "df = df.selectExpr(\"RS_GetBand(data,1,bands) as targetband\", \"height\", \"width\", \"bands\", \"Geom\")\n",
- " \n",
- "calculateSum = udf(SumOfValues, DoubleType())\n",
- "sedona.udf.register(\"RS_Sum\", calculateSum)\n",
- "\n",
- "sumDF = df.selectExpr(\"RS_Sum(targetband) as sum\")\n",
- "sumDF.show()"
+ "# Convert raster to its convex hull and transform it to EPSG:4326 to be able to visualize\n",
+ "raster_mbr_df = elevation_raster_df.selectExpr(\"ST_Transform(RS_ConvexHull(raster), 'EPSG:3857', 'EPSG:4326') as raster_mbr\")"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "'''Sample UDF to visualize a particular region of a GeoTiff image'''\n",
- "\n",
- "def generatemask(band, width,height):\n",
- " for (i,val) in enumerate(band):\n",
- " if (i%width>=12 and i%width<26) and (i%height>=12 and i%height<26):\n",
- " band[i] = 255.0\n",
- " else:\n",
- " band[i] = 0.0\n",
- " return band\n",
- "\n",
- "maskValues = udf(generatemask, ArrayType(DoubleType()))\n",
- "sedona.udf.register(\"RS_MaskValues\", maskValues)\n",
- "\n",
- "\n",
- "df_base64 = df.selectExpr(\"Geom\", \"RS_Base64(height,width,RS_Normalize(targetband), RS_Array(height*width,0.0), RS_Array(height*width, 0.0), RS_MaskValues(targetband,width,height)) as region\" )\n",
- "df_HTML = df_base64.selectExpr(\"Geom\",\"RS_HTML(region) as selectedregion\")\n",
- "display(HTML(df_HTML.limit(2).toPandas().to_html(escape=False)))\n"
+ "execution_count": 51,
+ "id": "6f39b3db-a0b1-4842-a5ca-b5a5850f3ea7",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "User Guide: https://docs.kepler.gl/docs/keplergl-jupyter\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "8ec0e5aca1954d36abe75cbe8703ba57",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "KeplerGl(data={'RasterMBR': {'index': [0], 'columns': ['geometry'], 'data': [['POLYGON ((-117.6417329630247508…"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "sedona_kepler_map_elevation = SedonaKepler.create_map(df=raster_mbr_df, name='RasterMBR')\n",
+ "sedona_kepler_map_elevation"
]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
}
],
"metadata": {
@@ -599,5 +709,5 @@
}
},
"nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
}