Skip to content

Commit

Permalink
generated COGs for the FLDAS netCDF
Browse files Browse the repository at this point in the history
  • Loading branch information
vishal committed Oct 2, 2023
1 parent 10519f1 commit b27c1fc
Showing 1 changed file with 213 additions and 0 deletions.
213 changes: 213 additions & 0 deletions soil_moisture_transformation_cog.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "20da13af-4f48-4337-bd3e-13c15f20a0a5",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import s3fs\n",
"import matplotlib.pyplot as plt\n",
"import rioxarray\n",
"import rasterio\n",
"import rio_cogeo.cogeo\n",
"import xarray as xr\n",
"import re\n",
"from datetime import datetime\n",
"import pandas as pd\n",
"import boto3\n",
"import tempfile"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "499f3ed1-7191-4499-854b-c3556bf5b1d5",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"session = boto3.Session()\n",
"s3_client = session.client(\"s3\")\n",
"bucket_name = (\"veda-data-store-staging\")\n",
"FOLDER_NAME = \"fldas_anomalies_SoilMoi00_10cm_tavg_cog\"\n",
"\n",
"files_processed = pd.DataFrame(\n",
" columns=[\"file_name\", \"COGs_created\"]\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "da869432-3f10-495c-94d9-517e1686f0a6",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"def get_all_s3_keys(bucket):\n",
" \"\"\"Get a list of all keys in an S3 bucket.\"\"\"\n",
" keys = []\n",
"\n",
" kwargs = {\"Bucket\": bucket, \"Prefix\": \"FLDAS/FLDAS_NOAH01_C_GL_MA.001/\"}\n",
" while True:\n",
" resp = s3_client.list_objects_v2(**kwargs)\n",
" for obj in resp[\"Contents\"]:\n",
" if obj[\"Key\"].endswith(\".nc\"):\n",
" keys.append(obj[\"Key\"])\n",
"\n",
" try:\n",
" kwargs[\"ContinuationToken\"] = resp[\"NextContinuationToken\"]\n",
" except KeyError:\n",
" break\n",
"\n",
" return keys\n",
"\n",
"keys = get_all_s3_keys(\"gesdisc-cumulus-prod-protected\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "6a75676d-2f4f-4d4e-98a4-18804c1ba34e",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"\n",
"# name= 'gesdisc-cumulus-prod-protected/FLDAS/FLDAS_NOAH01_C_GL_MA.001/1997/FLDAS_NOAH01_C_GL_MA.ANOM199712.001.nc'\n",
"var = \"SoilMoi00_10cm_tavg\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7cc3aa3c-dd7d-4a39-a442-c675531ca4db",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Generated and saved COG: FLDAS_NOAH01_SoilMoi00_10cm_tavg_C_GL_MA_ANOM198201_19820101.tif\n",
"Generated and saved COG: FLDAS_NOAH01_SoilMoi00_10cm_tavg_C_GL_MA_ANOM198202_19820201.tif\n",
"Generated and saved COG: FLDAS_NOAH01_SoilMoi00_10cm_tavg_C_GL_MA_ANOM198203_19820301.tif\n",
"Generated and saved COG: FLDAS_NOAH01_SoilMoi00_10cm_tavg_C_GL_MA_ANOM198204_19820401.tif\n",
"Generated and saved COG: FLDAS_NOAH01_SoilMoi00_10cm_tavg_C_GL_MA_ANOM198205_19820501.tif\n",
"Generated and saved COG: FLDAS_NOAH01_SoilMoi00_10cm_tavg_C_GL_MA_ANOM198206_19820601.tif\n",
"Generated and saved COG: FLDAS_NOAH01_SoilMoi00_10cm_tavg_C_GL_MA_ANOM198207_19820701.tif\n",
"Generated and saved COG: FLDAS_NOAH01_SoilMoi00_10cm_tavg_C_GL_MA_ANOM198208_19820801.tif\n",
"Generated and saved COG: FLDAS_NOAH01_SoilMoi00_10cm_tavg_C_GL_MA_ANOM198209_19820901.tif\n",
"Generated and saved COG: FLDAS_NOAH01_SoilMoi00_10cm_tavg_C_GL_MA_ANOM198210_19821001.tif\n",
"Generated and saved COG: FLDAS_NOAH01_SoilMoi00_10cm_tavg_C_GL_MA_ANOM198211_19821101.tif\n"
]
}
],
"source": [
"for name in keys:\n",
" fs = s3fs.S3FileSystem(anon=False)\n",
" fileobj = fs.open(f\"gesdisc-cumulus-prod-protected/{name}\")\n",
" xds = xr.open_dataset(fileobj, engine=\"h5netcdf\")\n",
" xds = xds.assign_coords(lon=(((xds.lon + 180) % 360) - 180)).sortby(\"lon\")\n",
" for time_increment in range(0, len(xds.time)):\n",
" filename = name.split(\"/\")[-1]\n",
" filename_elements = re.split(\"[_ .]\", filename)\n",
" start_time = [str(pd.to_datetime(xds.time.values[0]).year), str(pd.to_datetime(xds.time.values[0]).month), str(pd.to_datetime(xds.time.values[0]).day)]\n",
" data = getattr(xds.isel(time=time_increment), var)\n",
" data = data.isel(lat=slice(None, None, -1))\n",
" data.rio.set_spatial_dims(\"lon\", \"lat\", inplace=True)\n",
" data.rio.write_crs(\"epsg:4326\", inplace=True)\n",
"\n",
" # # insert date of generated COG into filename\n",
" filename_elements.pop()\n",
" filename_elements[-1] = pd.to_datetime(xds.time.values[0]).strftime(\"%Y%m%d\")\n",
" filename_elements.insert(2, var)\n",
" cog_filename = \"_\".join(filename_elements)\n",
" # # add extension\n",
" cog_filename = f\"{cog_filename}.tif\"\n",
"\n",
" with tempfile.NamedTemporaryFile() as temp_file:\n",
" data.rio.to_raster(\n",
" temp_file.name,\n",
" driver=\"COG\",\n",
" )\n",
" s3_client.upload_file(\n",
" Filename=temp_file.name,\n",
" Bucket=bucket_name,\n",
" Key=f\"{FOLDER_NAME}/{cog_filename}\",\n",
" )\n",
"\n",
" files_processed = files_processed._append(\n",
" {\"file_name\": name, \"COGs_created\": cog_filename},\n",
" ignore_index=True,\n",
" )\n",
"\n",
" print(f\"Generated and saved COG: {cog_filename}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "14e06cc2-bf22-4703-b657-2d5b680045a4",
"metadata": {},
"outputs": [],
"source": [
"with tempfile.NamedTemporaryFile(mode=\"w+\") as fp:\n",
" json.dump(xds.attrs, fp)\n",
" json.dump({\"data_dimensions\": dict(xds.dims)}, fp)\n",
" json.dump({\"data_variables\": list(xds.data_vars)}, fp)\n",
" fp.flush()\n",
"\n",
" s3_client.upload_file(\n",
" Filename=fp.name,\n",
" Bucket=bucket_name,\n",
" Key=f\"{FOLDER_NAME}/metadata.json\",\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b1af6cb5-6209-4f56-9695-b6c4bd9f33eb",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"files_processed.to_csv(\n",
" f\"s3://{bucket_name}/{FOLDER_NAME}/files_converted.csv\",\n",
")\n",
"print(\"Done generating COGs\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

0 comments on commit b27c1fc

Please sign in to comment.