diff --git a/transformation-scripts/collection-and-item-workflows-ingest.ipynb b/transformation-scripts/collection-and-item-workflows-ingest.ipynb index 797d73f..6beca9e 100644 --- a/transformation-scripts/collection-and-item-workflows-ingest.ipynb +++ b/transformation-scripts/collection-and-item-workflows-ingest.ipynb @@ -67,7 +67,7 @@ "collection_json_file_paths = glob.glob(\"../ingestion-data/collections/*.json\")\n", "filtered_collection_file_paths_list = [\n", " item\n", - " for item in json_file_paths\n", + " for item in collection_json_file_paths\n", " if all(\n", " excluded_collections not in item\n", " for excluded_collections in excluded_collections\n", @@ -169,14 +169,8 @@ "outputs": [], "source": [ "test_endpoint = \"https://test.openveda.cloud\"\n", - "test_client_id = \"CHANGE ME\"\n", - "test_user_pool_id = \"CHANGE ME\"\n", - "test_identity_pool_id = \"CHANGE ME\"\n", "\n", "mcp_prod_endpoint = \"https://openveda.cloud\"\n", - "mcp_prod_client_id = \"CHANGE ME\"\n", - "mcp_prod_user_pool_id = \"CHANGE ME\"\n", - "mcp_prod_identity_pool_id = \"CHANGE ME\"\n", "\n", "print(f\"TESTING MODE? {testing_mode}\")\n", "if testing_mode:\n", @@ -307,7 +301,8 @@ "outputs": [], "source": [ "test_file_paths_and_collection_ids = [file_paths_and_collection_ids[0]]\n", - "test_discovery_item = [f\"../ingestion-data/production/discovery-items/{file_paths_and_collection_ids[0].get(\"collectionId\")}.json\"]\n", + "test_discovery_json_path = f\"../ingestion-data/production/discovery-items/{file_paths_and_collection_ids[0].get(\"collectionId\")}.json\"\n", + "test_discovery_item = [test_discovery_json_path]\n", "\n", "print(test_discovery_item)\n", "print(test_file_paths_and_collection_ids)\n", diff --git a/transformation-scripts/special-collection-and-item-workflows-ingest.ipynb b/transformation-scripts/special-collection-and-item-workflows-ingest.ipynb new file mode 100644 index 0000000..bf53546 --- /dev/null +++ b/transformation-scripts/special-collection-and-item-workflows-ingest.ipynb @@ -0,0 +1,502 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Notebook to Publish Special Collections and Start Discovery Workflow" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook publishes the following collections in `/ingestion-data/collections`:\n", + "- 'hls-l30-002-ej-reprocessed'\n", + "- 'hls-s30-002-ej-reprocessed'\n", + "- 'ls8-covid-19-example-data'\n", + "- 'landsat-c2l2-sr-antarctic-glaciers-pine-island'\n", + "- 'landsat-c2l2-sr-lakes-aral-sea'\n", + "- 'landsat-c2l2-sr-lakes-tonle-sap'\n", + "- 'landsat-c2l2-sr-lakes-lake-balaton'\n", + "- 'landsat-c2l2-sr-lakes-vanern'\n", + "- 'landsat-c2l2-sr-antarctic-glaciers-thwaites'\n", + "- 'landsat-c2l2-sr-lakes-lake-biwa'\n", + "- 'combined_CMIP6_daily_GISS-E2-1-G_tas_kerchunk_DEMO'" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import glob\n", + "import os\n", + "import json\n", + "import requests\n", + "\n", + "from cognito_client import CognitoClient" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following cell retrieves collection JSON files from `/ingestion-data/collections/` and save collectionIds to a list." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['../ingestion-data/collections/hls-l30-002-ej-reprocessed.json', '../ingestion-data/collections/hls-s30-002-ej-reprocessed.json', '../ingestion-data/collections/ls8-covid-19-example-data.json', '../ingestion-data/collections/landsat-c2l2-sr-antarctic-glaciers-pine-island.json', '../ingestion-data/collections/landsat-c2l2-sr-lakes-aral-sea.json', '../ingestion-data/collections/landsat-c2l2-sr-lakes-tonle-sap.json', '../ingestion-data/collections/landsat-c2l2-sr-lakes-lake-balaton.json', '../ingestion-data/collections/landsat-c2l2-sr-lakes-vanern.json', '../ingestion-data/collections/landsat-c2l2-sr-antarctic-glaciers-thwaites.json', '../ingestion-data/collections/landsat-c2l2-sr-lakes-lake-biwa.json', '../ingestion-data/collections/combined_CMIP6_daily_GISS-E2-1-G_tas_kerchunk_DEMO.json', '../ingestion-data/collections/nceo_africa_2017.json']\n" + ] + } + ], + "source": [ + "special_collections = [\n", + " \"hls-l30-002-ej-reprocessed\",\n", + " \"hls-s30-002-ej-reprocessed\",\n", + " \"ls8-covid-19-example-data\",\n", + " \"landsat-c2l2-sr-antarctic-glaciers-pine-island\",\n", + " \"landsat-c2l2-sr-lakes-aral-sea\",\n", + " \"landsat-c2l2-sr-lakes-tonle-sap\",\n", + " \"landsat-c2l2-sr-lakes-lake-balaton\",\n", + " \"landsat-c2l2-sr-lakes-vanern\",\n", + " \"landsat-c2l2-sr-antarctic-glaciers-thwaites\",\n", + " \"landsat-c2l2-sr-lakes-lake-biwa\",\n", + " \"combined_CMIP6_daily_GISS-E2-1-G_tas_kerchunk_DEMO\",\n", + " \"nceo_africa_2017\",\n", + "]\n", + "\n", + "collection_file_paths = [\n", + " f\"../ingestion-data/collections/{collection}.json\"\n", + " for collection in special_collections\n", + "]\n", + "print(collection_file_paths)\n", + "\n", + "file_paths_and_collection_ids = [\n", + " {\"filePath\": file_path, \"collectionId\": data[\"id\"]}\n", + " for file_path in collection_file_paths\n", + " if \"id\" in (data := json.load(open(file_path, \"r\")))\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Set the testing mode to `True` when testing and `False` otherwise. When the testing mode is `True`, the notebook will be set to run against `dev` endpoints." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "testing_mode = True" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following cell compares files in '/ingestion/collections' with those in 'ingestion/staging/discovery-items' or 'ingestion/production/discovery-items' and returns a list of all the discovery-items that have a corresponding collection." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['../ingestion-data/production/discovery-items/hls-l30-002-ej-reprocessed.json', '../ingestion-data/production/discovery-items/hls-s30-002-ej-reprocessed.json', '../ingestion-data/production/discovery-items/ls8-covid-19-example-data.json', '../ingestion-data/production/discovery-items/landsat-c2l2-sr-antarctic-glaciers-pine-island.json', '../ingestion-data/production/discovery-items/landsat-c2l2-sr-lakes-aral-sea.json', '../ingestion-data/production/discovery-items/landsat-c2l2-sr-lakes-tonle-sap.json', '../ingestion-data/production/discovery-items/landsat-c2l2-sr-lakes-lake-balaton.json', '../ingestion-data/production/discovery-items/landsat-c2l2-sr-lakes-vanern.json', '../ingestion-data/production/discovery-items/landsat-c2l2-sr-antarctic-glaciers-thwaites.json', '../ingestion-data/production/discovery-items/landsat-c2l2-sr-lakes-lake-biwa.json', '../ingestion-data/production/discovery-items/combined_CMIP6_daily_GISS-E2-1-G_tas_kerchunk_DEMO.json', '../ingestion-data/production/discovery-items/nceo_africa_2017.json']\n", + "['../ingestion-data/production/discovery-items/nceo_africa_2017.json', '../ingestion-data/production/discovery-items/nightlights-hd-3bands.json', '../ingestion-data/production/discovery-items/nceo_africa_2017.json', '../ingestion-data/production/discovery-items/nceo_africa_2017.json', '../ingestion-data/production/discovery-items/nightlights-hd-3bands.json', '../ingestion-data/production/discovery-items/nceo_africa_2017.json', '../ingestion-data/production/discovery-items/nightlights-hd-3bands.json', '../ingestion-data/production/discovery-items/nceo_africa_2017.json', '../ingestion-data/production/discovery-items/nightlights-hd-3bands.json', '../ingestion-data/production/discovery-items/nceo_africa_2017.json', '../ingestion-data/production/discovery-items/nightlights-hd-3bands.json', '../ingestion-data/production/discovery-items/nceo_africa_2017.json', '../ingestion-data/production/discovery-items/nightlights-hd-3bands.json', '../ingestion-data/production/discovery-items/nceo_africa_2017.json']\n" + ] + } + ], + "source": [ + "items_in_external_buckets = []\n", + "\n", + "\n", + "def find_matching_file_names(collections_list, discovery_items_list):\n", + " matching_file_names = []\n", + " for collection_filename in collections_list:\n", + " collection_json = load_json_file(collection_filename)\n", + " id1 = collection_json.get(\"id\")\n", + " if id1 is not None:\n", + " for discovery_items_filename in discovery_items_list:\n", + " item_json = load_json_file(discovery_items_filename)\n", + " if isinstance(item_json, list):\n", + " if len(item_json) > 0:\n", + " collection2 = item_json[0].get(\"collection\")\n", + " if (\n", + " \"bucket\" in item_json\n", + " and item_json[0].get(\"bucket\") != \"veda-data-store\"\n", + " ):\n", + " items_in_external_buckets.append(discovery_items_filename)\n", + " else:\n", + " collection2 = item_json.get(\"collection\")\n", + " if collection2 is not None:\n", + " if (\n", + " \"bucket\" in item_json\n", + " and item_json.get(\"bucket\") != \"veda-data-store\"\n", + " ):\n", + " items_in_external_buckets.append(discovery_items_filename)\n", + " if collection2 == id1:\n", + " # Found a match\n", + " matching_file_names.append(discovery_items_filename)\n", + " break\n", + " return matching_file_names\n", + "\n", + "\n", + "def load_json_file(file_path):\n", + " with open(file_path, \"r\") as file:\n", + " return json.load(file)\n", + "\n", + "\n", + "discovery_items_json_file_paths = glob.glob(\n", + " \"../ingestion-data/production/discovery-items//*.json\"\n", + ")\n", + "\n", + "# Find matching file names\n", + "matching_file_names = find_matching_file_names(\n", + " collection_file_paths, discovery_items_json_file_paths\n", + ")\n", + "\n", + "special_items_to_process = matching_file_names\n", + "print(special_items_to_process)\n", + "print(items_in_external_buckets)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "testing_mode = True" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Have your Cognito `username` and `password` ready to set up Cognito Client to retrieve a token that will be used to access the STAC Ingestor API." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "test_endpoint = \"https://test.openveda.cloud\"\n", + "test_client_id = \"CHANGE ME\"\n", + "test_user_pool_id = \"CHANGE ME\"\n", + "test_identity_pool_id = \"CHANGE ME\"\n", + "\n", + "mcp_prod_endpoint = \"https://openveda.cloud\"\n", + "mcp_prod_client_id = \"CHANGE ME\"\n", + "mcp_prod_user_pool_id = \"CHANGE ME\"\n", + "mcp_prod_identity_pool_id = \"CHANGE ME\"\n", + "\n", + "staging_endpoint = \"https://staging-stac.delta-backend.com/\"\n", + "staging_client_id = \"CHANGE ME\"\n", + "staging_user_pool_id = \"CHANGE ME\"\n", + "staging_identity_pool_id = \"CHANGE ME\"\n", + "\n", + "if testing_mode:\n", + " STAC_INGESTOR_API = f\"{test_endpoint}/api/ingest/\"\n", + " VEDA_STAC_API = f\"{test_endpoint}/api/stac/\"\n", + " WORKFLOWS_API = \"https://4hrks0hk0b.execute-api.us-west-2.amazonaws.com/\"\n", + "else:\n", + " STAC_INGESTOR_API = f\"{mcp_prod_endpoint}/api/ingest/\"\n", + " VEDA_STAC_API = f\"{mcp_prod_endpoint}/api/stac/\"\n", + " WORKFLOWS_API = \"https://bct2n8in53.execute-api.us-west-2.amazonaws.com/\"\n", + "\n", + "client = CognitoClient(\n", + " client_id=staging_client_id,\n", + " user_pool_id=staging_user_pool_id,\n", + " identity_pool_id=staging_identity_pool_id,\n", + ")\n", + "_ = client.login()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following cell sets up headers for requests." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "TOKEN = client.access_token\n", + "authorization_header = f\"Bearer {TOKEN}\"\n", + "headers = {\n", + " \"Authorization\": authorization_header,\n", + " \"content-type\": \"application/json\",\n", + " \"accept\": \"application/json\",\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following cell defines the function that will post the collection." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "JSON content: {'id': 'AGB_map_2017v0m_COG', 'bbox': [-18.273529509559307, -35.054059016911935, 51.86423292864056, 37.73103856358817], 'type': 'Feature', 'links': [{'rel': 'collection', 'type': 'application/json', 'href': 'https://staging-stac.delta-backend.com/collections/nceo_africa_2017'}, {'rel': 'parent', 'type': 'application/json', 'href': 'https://staging-stac.delta-backend.com/collections/nceo_africa_2017'}, {'rel': 'root', 'type': 'application/json', 'href': 'https://staging-stac.delta-backend.com/'}, {'rel': 'self', 'type': 'application/geo+json', 'href': 'https://staging-stac.delta-backend.com/collections/nceo_africa_2017/items/AGB_map_2017v0m_COG'}, {'title': 'Map of Item', 'href': 'https://3hwvk17uek.execute-api.us-west-2.amazonaws.com/stac/map?collection=nceo_africa_2017&item=AGB_map_2017v0m_COG&assets=cog_default&rescale=0%2C400&colormap_name=gist_earth_r', 'rel': 'preview', 'type': 'text/html'}], 'assets': {'cog_default': {'href': 's3://nasa-maap-data-store/file-staging/nasa-map/nceo-africa-2017/AGB_map_2017v0m_COG.tif', 'type': 'image/tiff; application=geotiff; profile=cloud-optimized', 'roles': ['data', 'layer'], 'title': 'Default COG Layer', 'description': 'Cloud optimized default layer to display on map', 'raster:bands': [{'scale': 1.0, 'nodata': 'inf', 'offset': 0.0, 'sampling': 'area', 'data_type': 'uint16', 'histogram': {'max': 429.0, 'min': 0.0, 'count': 11.0, 'buckets': [405348.0, 44948.0, 18365.0, 6377.0, 3675.0, 3388.0, 3785.0, 9453.0, 13108.0, 1186.0]}, 'statistics': {'mean': 37.58407913145342, 'stddev': 81.36678677343947, 'maximum': 429.0, 'minimum': 0.0, 'valid_percent': 50.42436439336373}}]}, 'rendered_preview': {'title': 'Rendered preview', 'href': 'https://3hwvk17uek.execute-api.us-west-2.amazonaws.com/stac/preview.png?collection=nceo_africa_2017&item=AGB_map_2017v0m_COG&assets=cog_default&rescale=0%2C400&colormap_name=gist_earth_r', 'rel': 'preview', 'roles': ['overview'], 'type': 'image/png'}}, 'geometry': {'type': 'Polygon', 'coordinates': [[[-18.273529509559307, -35.054059016911935], [51.86423292864056, -35.054059016911935], [51.86423292864056, 37.73103856358817], [-18.273529509559307, 37.73103856358817], [-18.273529509559307, -35.054059016911935]]]}, 'collection': 'nceo_africa_2017', 'properties': {'proj:bbox': [-18.273529509559307, -35.054059016911935, 51.86423292864056, 37.73103856358817], 'proj:epsg': 4326.0, 'proj:shape': [81024.0, 78077.0], 'end_datetime': '2017-12-31T23:59:59+00:00', 'proj:geometry': {'type': 'Polygon', 'coordinates': [[[-18.273529509559307, -35.054059016911935], [51.86423292864056, -35.054059016911935], [51.86423292864056, 37.73103856358817], [-18.273529509559307, 37.73103856358817], [-18.273529509559307, -35.054059016911935]]]}, 'proj:transform': [0.0008983152841195214, 0.0, -18.273529509559307, 0.0, -0.0008983152841195214, 37.73103856358817, 0.0, 0.0, 1.0], 'start_datetime': '2017-01-01T00:00:00+00:00'}, 'stac_version': '1.0.0', 'stac_extensions': ['https://stac-extensions.github.io/projection/v1.0.0/schema.json', 'https://stac-extensions.github.io/raster/v1.1.0/schema.json']}\n", + "ITEM {'id': 'AGB_map_2017v0m_COG', 'bbox': [-18.273529509559307, -35.054059016911935, 51.86423292864056, 37.73103856358817], 'type': 'Feature', 'links': [], 'assets': {'cog_default': {'href': 's3://nasa-maap-data-store/file-staging/nasa-map/nceo-africa-2017/AGB_map_2017v0m_COG.tif', 'type': 'image/tiff; application=geotiff; profile=cloud-optimized', 'roles': ['data', 'layer'], 'title': 'Default COG Layer', 'description': 'Cloud optimized default layer to display on map', 'raster:bands': [{'scale': 1.0, 'nodata': 'inf', 'offset': 0.0, 'sampling': 'area', 'data_type': 'uint16', 'histogram': {'max': 429.0, 'min': 0.0, 'count': 11.0, 'buckets': [405348.0, 44948.0, 18365.0, 6377.0, 3675.0, 3388.0, 3785.0, 9453.0, 13108.0, 1186.0]}, 'statistics': {'mean': 37.58407913145342, 'stddev': 81.36678677343947, 'maximum': 429.0, 'minimum': 0.0, 'valid_percent': 50.42436439336373}}]}, 'rendered_preview': {'title': 'Rendered preview', 'href': 'https://3hwvk17uek.execute-api.us-west-2.amazonaws.com/stac/preview.png?collection=nceo_africa_2017&item=AGB_map_2017v0m_COG&assets=cog_default&rescale=0%2C400&colormap_name=gist_earth_r', 'rel': 'preview', 'roles': ['overview'], 'type': 'image/png'}}, 'geometry': {'type': 'Polygon', 'coordinates': [[[-18.273529509559307, -35.054059016911935], [51.86423292864056, -35.054059016911935], [51.86423292864056, 37.73103856358817], [-18.273529509559307, 37.73103856358817], [-18.273529509559307, -35.054059016911935]]]}, 'collection': 'nceo_africa_2017', 'properties': {'proj:bbox': [-18.273529509559307, -35.054059016911935, 51.86423292864056, 37.73103856358817], 'proj:epsg': 4326.0, 'proj:shape': [81024.0, 78077.0], 'end_datetime': '2017-12-31T23:59:59+00:00', 'proj:geometry': {'type': 'Polygon', 'coordinates': [[[-18.273529509559307, -35.054059016911935], [51.86423292864056, -35.054059016911935], [51.86423292864056, 37.73103856358817], [-18.273529509559307, 37.73103856358817], [-18.273529509559307, -35.054059016911935]]]}, 'proj:transform': [0.0008983152841195214, 0.0, -18.273529509559307, 0.0, -0.0008983152841195214, 37.73103856358817, 0.0, 0.0, 1.0], 'start_datetime': '2017-01-01T00:00:00+00:00'}, 'stac_version': '1.0.0', 'stac_extensions': ['https://stac-extensions.github.io/projection/v1.0.0/schema.json', 'https://stac-extensions.github.io/raster/v1.1.0/schema.json']}\n", + "FINAL {'id': 'AGB_map_2017v0m_COG', 'bbox': [-18.273529509559307, -35.054059016911935, 51.86423292864056, 37.73103856358817], 'type': 'Feature', 'links': [], 'assets': {'cog_default': {'href': 's3://nasa-maap-data-store/file-staging/nasa-map/nceo-africa-2017/AGB_map_2017v0m_COG.tif', 'type': 'image/tiff; application=geotiff; profile=cloud-optimized', 'roles': ['data', 'layer'], 'title': 'Default COG Layer', 'description': 'Cloud optimized default layer to display on map', 'raster:bands': [{'scale': 1.0, 'nodata': 'inf', 'offset': 0.0, 'sampling': 'area', 'data_type': 'uint16', 'histogram': {'max': 429.0, 'min': 0.0, 'count': 11.0, 'buckets': [405348.0, 44948.0, 18365.0, 6377.0, 3675.0, 3388.0, 3785.0, 9453.0, 13108.0, 1186.0]}, 'statistics': {'mean': 37.58407913145342, 'stddev': 81.36678677343947, 'maximum': 429.0, 'minimum': 0.0, 'valid_percent': 50.42436439336373}}]}}, 'geometry': {'type': 'Polygon', 'coordinates': [[[-18.273529509559307, -35.054059016911935], [51.86423292864056, -35.054059016911935], [51.86423292864056, 37.73103856358817], [-18.273529509559307, 37.73103856358817], [-18.273529509559307, -35.054059016911935]]]}, 'collection': 'nceo_africa_2017', 'properties': {'proj:bbox': [-18.273529509559307, -35.054059016911935, 51.86423292864056, 37.73103856358817], 'proj:epsg': 4326.0, 'proj:shape': [81024.0, 78077.0], 'end_datetime': '2017-12-31T23:59:59+00:00', 'proj:geometry': {'type': 'Polygon', 'coordinates': [[[-18.273529509559307, -35.054059016911935], [51.86423292864056, -35.054059016911935], [51.86423292864056, 37.73103856358817], [-18.273529509559307, 37.73103856358817], [-18.273529509559307, -35.054059016911935]]]}, 'proj:transform': [0.0008983152841195214, 0.0, -18.273529509559307, 0.0, -0.0008983152841195214, 37.73103856358817, 0.0, 0.0, 1.0], 'start_datetime': '2017-01-01T00:00:00+00:00'}, 'stac_version': '1.0.0', 'stac_extensions': ['https://stac-extensions.github.io/projection/v1.0.0/schema.json', 'https://stac-extensions.github.io/raster/v1.1.0/schema.json']}\n", + "{'id': 'AGB_map_2017v0m_COG', 'bbox': [-18.273529509559307, -35.054059016911935, 51.86423292864056, 37.73103856358817], 'type': 'Feature', 'links': [], 'assets': {'cog_default': {'href': 's3://nasa-maap-data-store/file-staging/nasa-map/nceo-africa-2017/AGB_map_2017v0m_COG.tif', 'type': 'image/tiff; application=geotiff; profile=cloud-optimized', 'roles': ['data', 'layer'], 'title': 'Default COG Layer', 'description': 'Cloud optimized default layer to display on map', 'raster:bands': [{'scale': 1.0, 'nodata': 'inf', 'offset': 0.0, 'sampling': 'area', 'data_type': 'uint16', 'histogram': {'max': 429.0, 'min': 0.0, 'count': 11.0, 'buckets': [405348.0, 44948.0, 18365.0, 6377.0, 3675.0, 3388.0, 3785.0, 9453.0, 13108.0, 1186.0]}, 'statistics': {'mean': 37.58407913145342, 'stddev': 81.36678677343947, 'maximum': 429.0, 'minimum': 0.0, 'valid_percent': 50.42436439336373}}]}}, 'geometry': {'type': 'Polygon', 'coordinates': [[[-18.273529509559307, -35.054059016911935], [51.86423292864056, -35.054059016911935], [51.86423292864056, 37.73103856358817], [-18.273529509559307, 37.73103856358817], [-18.273529509559307, -35.054059016911935]]]}, 'collection': 'nceo_africa_2017', 'properties': {'proj:bbox': [-18.273529509559307, -35.054059016911935, 51.86423292864056, 37.73103856358817], 'proj:epsg': 4326.0, 'proj:shape': [81024.0, 78077.0], 'end_datetime': '2017-12-31T23:59:59+00:00', 'proj:geometry': {'type': 'Polygon', 'coordinates': [[[-18.273529509559307, -35.054059016911935], [51.86423292864056, -35.054059016911935], [51.86423292864056, 37.73103856358817], [-18.273529509559307, 37.73103856358817], [-18.273529509559307, -35.054059016911935]]]}, 'proj:transform': [0.0008983152841195214, 0.0, -18.273529509559307, 0.0, -0.0008983152841195214, 37.73103856358817, 0.0, 0.0, 1.0], 'start_datetime': '2017-01-01T00:00:00+00:00'}, 'stac_version': '1.0.0', 'stac_extensions': ['https://stac-extensions.github.io/projection/v1.0.0/schema.json', 'https://stac-extensions.github.io/raster/v1.1.0/schema.json']}\n" + ] + } + ], + "source": [ + "def remove_links(item):\n", + " item[\"links\"] = []\n", + " print(f\"ITEM {item}\")\n", + " return item\n", + "\n", + "\n", + "def remove_rendered_preview(item):\n", + " if item[\"assets\"][\"rendered_preview\"]:\n", + " del item[\"assets\"][\"rendered_preview\"]\n", + " return item\n", + "\n", + "\n", + "# def add_null_datetime(item):\n", + "# item[\"properties\"][\"datetime\"] = None\n", + "\n", + "\n", + "def get_item_to_ingest(collection_id):\n", + " url = f\"{staging_endpoint}/collections/{collection_id}/items\"\n", + " response = requests.get(url, headers=headers)\n", + " response.raise_for_status()\n", + " json_response = response.json()\n", + " features = json_response.get(\"features\")\n", + " for feature in features:\n", + " # Iterate through links\n", + " for link in feature[\"links\"]:\n", + " # Check if rel is \"self\"\n", + " if link[\"rel\"] == \"self\":\n", + " # If rel is \"self\", extract href\n", + " href = link[\"href\"]\n", + " break # Exit loop once href is found\n", + " if href: # If href is found, break outer loop\n", + " break\n", + " return href\n", + "\n", + "\n", + "def modify_item_before_ingest(item_href):\n", + " try:\n", + " response = requests.get(item_href)\n", + " response.raise_for_status() # Raise an exception for HTTP errors\n", + " json_content = response.json() # Parse JSON response\n", + " print(\"JSON content:\", json_content)\n", + " json_content = remove_links(json_content)\n", + " json_content = remove_rendered_preview(json_content)\n", + " # json_content = add_null_datetime(json_content)\n", + " print(f\"FINAL {json_content}\")\n", + " return json_content\n", + " except requests.exceptions.RequestException as e:\n", + " print(\"Error fetching JSON content:\", e)\n", + "\n", + "\n", + "# TESTING THINGS\n", + "item_to_ingest = get_item_to_ingest(\"nceo_africa_2017\")\n", + "finalized_item = modify_item_before_ingest(item_to_ingest)\n", + "print(finalized_item)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'data' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[6], line 50\u001b[0m\n\u001b[1;32m 47\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m requests\u001b[38;5;241m.\u001b[39mexceptions\u001b[38;5;241m.\u001b[39mRequestException \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 48\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mError fetching JSON content:\u001b[39m\u001b[38;5;124m\"\u001b[39m, e)\n\u001b[0;32m---> 50\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mget_item_to_ingest\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mnceo_africa_2017\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 51\u001b[0m \u001b[38;5;28mprint\u001b[39m(result)\n\u001b[1;32m 53\u001b[0m failed_ingest_items \u001b[38;5;241m=\u001b[39m []\n", + "Cell \u001b[0;32mIn[6], line 31\u001b[0m, in \u001b[0;36mget_item_to_ingest\u001b[0;34m(collection_id)\u001b[0m\n\u001b[1;32m 29\u001b[0m json_response \u001b[38;5;241m=\u001b[39m response\u001b[38;5;241m.\u001b[39mjson()\n\u001b[1;32m 30\u001b[0m features \u001b[38;5;241m=\u001b[39m json_response\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfeatures\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 31\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m feature \u001b[38;5;129;01min\u001b[39;00m \u001b[43mdata\u001b[49m[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfeatures\u001b[39m\u001b[38;5;124m\"\u001b[39m]:\n\u001b[1;32m 32\u001b[0m \u001b[38;5;66;03m# Iterate through links\u001b[39;00m\n\u001b[1;32m 33\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m link \u001b[38;5;129;01min\u001b[39;00m feature[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlinks\u001b[39m\u001b[38;5;124m\"\u001b[39m]:\n\u001b[1;32m 34\u001b[0m \u001b[38;5;66;03m# Check if rel is \"self\"\u001b[39;00m\n\u001b[1;32m 35\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m link[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrel\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mself\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 36\u001b[0m \u001b[38;5;66;03m# If rel is \"self\", extract href\u001b[39;00m\n", + "\u001b[0;31mNameError\u001b[0m: name 'data' is not defined" + ] + } + ], + "source": [ + "def post_collection(collection, collection_id):\n", + " collection_url = f\"{VEDA_STAC_API}collections/{collection_id}\"\n", + " ingest_url = f\"{STAC_INGESTOR_API}collections\"\n", + "\n", + " try:\n", + " response = requests.post(ingest_url, json=collection, headers=headers)\n", + " response.raise_for_status()\n", + " if response.status_code == 201:\n", + " print(\n", + " f\"Request was successful. Find the updated collection at {collection_url}\"\n", + " )\n", + " else:\n", + " print(\n", + " f\"ERROR: Updating {collection_id} failed. Request failed with status code: {response.status_code}\"\n", + " )\n", + " except requests.RequestException as e:\n", + " print(\n", + " f\"ERROR: Updating {collection_id} failed. An error occurred during the request: {e}\"\n", + " )\n", + " except Exception as e:\n", + " print(\n", + " f\"ERROR: An unexpected error occurred while trying to update {collection_id}: {e}\"\n", + " )\n", + "\n", + "\n", + "failed_ingest_items = []\n", + "\n", + "\n", + "def ingest_external_item(external_item, external_item_path):\n", + " ingest_url = f\"{STAC_INGESTOR_API}ingestion\"\n", + " print(ingest_url)\n", + " try:\n", + " response = requests.post(ingest_url, json=external_item, headers=headers)\n", + " response.raise_for_status()\n", + " if response.status_code == 201:\n", + " print(f\"Request was successful. {response}\")\n", + " else:\n", + " print(\n", + " f\"ERROR: Ingesting item for {external_item} failed. Request failed with status code: {response.status_code}\"\n", + " )\n", + " failed_ingest_items.append(external_item_path)\n", + " except requests.RequestException as e:\n", + " print(\n", + " f\"ERROR: Ingesting item for {external_item} failed. An error occurred during the request: {e}\"\n", + " )\n", + " failed_ingest_items.append(external_item_path)\n", + " except Exception as e:\n", + " print(\n", + " f\"ERROR: An unexpected error occurred while trying to ingest item for {external_item} failed: {e}\"\n", + " )\n", + " failed_ingest_items.append(external_item_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following cell publishes the collection to the target ingestion `api/collections` endpoint." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for collection in file_paths_and_collection_ids:\n", + " collection_id = collection[\"collectionId\"]\n", + " file_path = collection[\"filePath\"]\n", + "\n", + " try:\n", + " with open(file_path, \"r\", encoding=\"utf-8\") as file:\n", + " collection = json.load(file)\n", + "\n", + " # Publish the updated collection to the target ingestion `api/collections` endpoint\n", + " post_collection(collection, collection_id)\n", + "\n", + " except requests.RequestException as e:\n", + " print(f\"An error occurred for collectionId {collection_id}: {e}\")\n", + " except Exception as e:\n", + " print(f\"An unexpected error occurred for collectionId {collection_id}: {e}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following cell ingests the collection items:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for special_item_path in special_items_to_process:\n", + " try:\n", + " with open(special_item_path, \"r\", encoding=\"utf-8\") as file:\n", + " discovery_item_json = json.load(file)\n", + " print(discovery_item_json)\n", + "\n", + " if isinstance(discovery_item_json, list):\n", + " for single_discovery_item_json in discovery_item_json:\n", + " ingest_external_item(single_discovery_item_json, special_item_path)\n", + " else:\n", + " ingest_external_item(discovery_item_json, special_item_path)\n", + "\n", + " except requests.RequestException as e:\n", + " print(f\"An error occurred for item {special_item_path}: {e}\")\n", + " except Exception as e:\n", + " print(f\"An unexpected error occurred for item {special_item_path}: {e}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(failed_ingest_items)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}