-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
vishal
committed
Oct 2, 2023
1 parent
10519f1
commit b27c1fc
Showing
1 changed file
with
213 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,213 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"id": "20da13af-4f48-4337-bd3e-13c15f20a0a5", | ||
"metadata": { | ||
"tags": [] | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"import s3fs\n", | ||
"import matplotlib.pyplot as plt\n", | ||
"import rioxarray\n", | ||
"import rasterio\n", | ||
"import rio_cogeo.cogeo\n", | ||
"import xarray as xr\n", | ||
"import re\n", | ||
"from datetime import datetime\n", | ||
"import pandas as pd\n", | ||
"import boto3\n", | ||
"import tempfile" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"id": "499f3ed1-7191-4499-854b-c3556bf5b1d5", | ||
"metadata": { | ||
"tags": [] | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"session = boto3.Session()\n", | ||
"s3_client = session.client(\"s3\")\n", | ||
"bucket_name = (\"veda-data-store-staging\")\n", | ||
"FOLDER_NAME = \"fldas_anomalies_SoilMoi00_10cm_tavg_cog\"\n", | ||
"\n", | ||
"files_processed = pd.DataFrame(\n", | ||
" columns=[\"file_name\", \"COGs_created\"]\n", | ||
")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"id": "da869432-3f10-495c-94d9-517e1686f0a6", | ||
"metadata": { | ||
"tags": [] | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"def get_all_s3_keys(bucket):\n", | ||
" \"\"\"Get a list of all keys in an S3 bucket.\"\"\"\n", | ||
" keys = []\n", | ||
"\n", | ||
" kwargs = {\"Bucket\": bucket, \"Prefix\": \"FLDAS/FLDAS_NOAH01_C_GL_MA.001/\"}\n", | ||
" while True:\n", | ||
" resp = s3_client.list_objects_v2(**kwargs)\n", | ||
" for obj in resp[\"Contents\"]:\n", | ||
" if obj[\"Key\"].endswith(\".nc\"):\n", | ||
" keys.append(obj[\"Key\"])\n", | ||
"\n", | ||
" try:\n", | ||
" kwargs[\"ContinuationToken\"] = resp[\"NextContinuationToken\"]\n", | ||
" except KeyError:\n", | ||
" break\n", | ||
"\n", | ||
" return keys\n", | ||
"\n", | ||
"keys = get_all_s3_keys(\"gesdisc-cumulus-prod-protected\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"id": "6a75676d-2f4f-4d4e-98a4-18804c1ba34e", | ||
"metadata": { | ||
"tags": [] | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"\n", | ||
"# name= 'gesdisc-cumulus-prod-protected/FLDAS/FLDAS_NOAH01_C_GL_MA.001/1997/FLDAS_NOAH01_C_GL_MA.ANOM199712.001.nc'\n", | ||
"var = \"SoilMoi00_10cm_tavg\"" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "7cc3aa3c-dd7d-4a39-a442-c675531ca4db", | ||
"metadata": { | ||
"tags": [] | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Generated and saved COG: FLDAS_NOAH01_SoilMoi00_10cm_tavg_C_GL_MA_ANOM198201_19820101.tif\n", | ||
"Generated and saved COG: FLDAS_NOAH01_SoilMoi00_10cm_tavg_C_GL_MA_ANOM198202_19820201.tif\n", | ||
"Generated and saved COG: FLDAS_NOAH01_SoilMoi00_10cm_tavg_C_GL_MA_ANOM198203_19820301.tif\n", | ||
"Generated and saved COG: FLDAS_NOAH01_SoilMoi00_10cm_tavg_C_GL_MA_ANOM198204_19820401.tif\n", | ||
"Generated and saved COG: FLDAS_NOAH01_SoilMoi00_10cm_tavg_C_GL_MA_ANOM198205_19820501.tif\n", | ||
"Generated and saved COG: FLDAS_NOAH01_SoilMoi00_10cm_tavg_C_GL_MA_ANOM198206_19820601.tif\n", | ||
"Generated and saved COG: FLDAS_NOAH01_SoilMoi00_10cm_tavg_C_GL_MA_ANOM198207_19820701.tif\n", | ||
"Generated and saved COG: FLDAS_NOAH01_SoilMoi00_10cm_tavg_C_GL_MA_ANOM198208_19820801.tif\n", | ||
"Generated and saved COG: FLDAS_NOAH01_SoilMoi00_10cm_tavg_C_GL_MA_ANOM198209_19820901.tif\n", | ||
"Generated and saved COG: FLDAS_NOAH01_SoilMoi00_10cm_tavg_C_GL_MA_ANOM198210_19821001.tif\n", | ||
"Generated and saved COG: FLDAS_NOAH01_SoilMoi00_10cm_tavg_C_GL_MA_ANOM198211_19821101.tif\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"for name in keys:\n", | ||
" fs = s3fs.S3FileSystem(anon=False)\n", | ||
" fileobj = fs.open(f\"gesdisc-cumulus-prod-protected/{name}\")\n", | ||
" xds = xr.open_dataset(fileobj, engine=\"h5netcdf\")\n", | ||
" xds = xds.assign_coords(lon=(((xds.lon + 180) % 360) - 180)).sortby(\"lon\")\n", | ||
" for time_increment in range(0, len(xds.time)):\n", | ||
" filename = name.split(\"/\")[-1]\n", | ||
" filename_elements = re.split(\"[_ .]\", filename)\n", | ||
" start_time = [str(pd.to_datetime(xds.time.values[0]).year), str(pd.to_datetime(xds.time.values[0]).month), str(pd.to_datetime(xds.time.values[0]).day)]\n", | ||
" data = getattr(xds.isel(time=time_increment), var)\n", | ||
" data = data.isel(lat=slice(None, None, -1))\n", | ||
" data.rio.set_spatial_dims(\"lon\", \"lat\", inplace=True)\n", | ||
" data.rio.write_crs(\"epsg:4326\", inplace=True)\n", | ||
"\n", | ||
" # # insert date of generated COG into filename\n", | ||
" filename_elements.pop()\n", | ||
" filename_elements[-1] = pd.to_datetime(xds.time.values[0]).strftime(\"%Y%m%d\")\n", | ||
" filename_elements.insert(2, var)\n", | ||
" cog_filename = \"_\".join(filename_elements)\n", | ||
" # # add extension\n", | ||
" cog_filename = f\"{cog_filename}.tif\"\n", | ||
"\n", | ||
" with tempfile.NamedTemporaryFile() as temp_file:\n", | ||
" data.rio.to_raster(\n", | ||
" temp_file.name,\n", | ||
" driver=\"COG\",\n", | ||
" )\n", | ||
" s3_client.upload_file(\n", | ||
" Filename=temp_file.name,\n", | ||
" Bucket=bucket_name,\n", | ||
" Key=f\"{FOLDER_NAME}/{cog_filename}\",\n", | ||
" )\n", | ||
"\n", | ||
" files_processed = files_processed._append(\n", | ||
" {\"file_name\": name, \"COGs_created\": cog_filename},\n", | ||
" ignore_index=True,\n", | ||
" )\n", | ||
"\n", | ||
" print(f\"Generated and saved COG: {cog_filename}\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "14e06cc2-bf22-4703-b657-2d5b680045a4", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"with tempfile.NamedTemporaryFile(mode=\"w+\") as fp:\n", | ||
" json.dump(xds.attrs, fp)\n", | ||
" json.dump({\"data_dimensions\": dict(xds.dims)}, fp)\n", | ||
" json.dump({\"data_variables\": list(xds.data_vars)}, fp)\n", | ||
" fp.flush()\n", | ||
"\n", | ||
" s3_client.upload_file(\n", | ||
" Filename=fp.name,\n", | ||
" Bucket=bucket_name,\n", | ||
" Key=f\"{FOLDER_NAME}/metadata.json\",\n", | ||
" )" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "b1af6cb5-6209-4f56-9695-b6c4bd9f33eb", | ||
"metadata": { | ||
"tags": [] | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"files_processed.to_csv(\n", | ||
" f\"s3://{bucket_name}/{FOLDER_NAME}/files_converted.csv\",\n", | ||
")\n", | ||
"print(\"Done generating COGs\")" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.10.12" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |