Skip to content

Commit

Permalink
new nc file meta handling and force update on folderviews #72
Browse files Browse the repository at this point in the history
  • Loading branch information
FlatErikk committed Dec 19, 2024
1 parent dc8f150 commit 684d14f
Show file tree
Hide file tree
Showing 5 changed files with 392 additions and 115 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Generated by Django 4.2.5 on 2024-11-07 11:00

from django.db import migrations


class Migration(migrations.Migration):

dependencies = [
("climate", "0007_tempresultfile_timestamp_begin"),
]

operations = [
migrations.RemoveField(
model_name="tempresultfile",
name="band_metadata",
),
migrations.RemoveField(
model_name="tempresultfile",
name="net_cdf_times",
),
migrations.RemoveField(
model_name="tempresultfile",
name="num_bands",
),
migrations.RemoveField(
model_name="tempresultfile",
name="timestamp_begin",
),
]
18 changes: 18 additions & 0 deletions framework/climate/migrations/0009_tempresultfile_nc_meta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 4.2.5 on 2024-11-07 11:01

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("climate", "0008_remove_tempresultfile_band_metadata_and_more"),
]

operations = [
migrations.AddField(
model_name="tempresultfile",
name="nc_meta",
field=models.JSONField(default=dict, null=True),
),
]
22 changes: 10 additions & 12 deletions framework/climate/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,10 +273,15 @@ class TempResultFile(models.Model):
categorized_filename = models.CharField(max_length=500, unique=True, null=True)
filename = models.CharField(max_length=400, null=True)
category = models.CharField(max_length=255, choices=CATEGORIES, null=True)
num_bands = models.IntegerField(null=True)
timestamp_begin = models.CharField(max_length=500, null=True)
band_metadata = models.JSONField(default=dict)
net_cdf_times = models.JSONField(default=dict)

nc_meta = models.JSONField(null=True, default=dict)

# NOTE - deleted fields from last version (remove if new metadata works fine)
# num_bands = models.IntegerField(null=True)
# timestamp_begin = models.CharField(max_length=500, null=True)
# band_metadata = models.JSONField(default=dict)
# net_cdf_times = models.JSONField(default=dict)

st_mtime_nc = models.CharField(max_length=255, null=True)
st_mtime_tif = models.CharField(max_length=255, null=True)
st_size_nc = models.CharField(max_length=255, null=True)
Expand All @@ -291,14 +296,7 @@ def get_by_cat_filename(cat_filename: str):
return o

def get_file_metadata(self):
combined_metadata = {
'num_bands': self.num_bands,
'band_metadata': self.band_metadata,
'net_cdf_times': self.net_cdf_times,
'timestamp_begin': self.timestamp_begin
}

return combined_metadata
return self.nc_meta

def check_raw_version(self, version):
if str(version) != self.st_mtime_nc:
Expand Down
242 changes: 242 additions & 0 deletions framework/climate/ncmeta_handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
import json
import os
from subprocess import (PIPE, Popen,)
from typing import TypedDict


NCCornerCoordinates = TypedDict(
"CornerCoordinates",
{
"upperLeft": list,
"lowerLeft": list,
"upperRight": list,
"lowerRight": list,
"center": list,
},
)

NCExtent = TypedDict("NCExtent", {"type": str, "coordinates": list})

NCBand = TypedDict(
"NCBand",
{"index": int, "min": float, "max": float, "NETCDF_DIM_time": str},
total=False,
)

NCVariable = TypedDict(
"NCVariable",
{
"NETCDF_VARNAME": str,
"standard_name": str,
"long_name": str,
"type": str,
"unit": str,
},
total=False,
)

NCMetaData = TypedDict(
"NCMetaData",
{
"size": list,
"NETCDF_DIM_time_VALUES": list,
"time#calendar": str,
"time#units": str,
"cornerCoordinates": NCCornerCoordinates,
"extent": NCExtent,
"num_bands": int,
"bands": dict[str, NCBand],
"varinfo": NCVariable,
},
)


def read_raw_nc_meta_from_file(filepath: str):
JSON_metadata = None
try:
# reading metadata via gdalinfo script
process = Popen(["gdalinfo", filepath, "-json", "-mm"], stdout=PIPE, stderr=PIPE)
# process = Popen(f"gdalinfo data/tif_data/{input_filename} -json")
stdout, stderr = process.communicate()
metadata = stdout.decode("utf-8")

JSON_metadata = json.loads(metadata)
return JSON_metadata
except Exception as e:
print(e)
return False


def read_file_specific_metadata(filepath: str):
# file specific metadata extraction
file_meta = {
'st_mtime_nc': "",
'st_size_nc': ""
}

try:
# this is also used for converted version(s) of the file
# like tif. when a TempResultFile does not match the
# st_mtime of the file, the file is not up to date and should
# probably be deleted
filestats = os.stat(filepath)
file_meta['st_mtime_nc'] = filestats.st_mtime
file_meta['st_size_nc'] = filestats.st_size

return file_meta
except Exception:
return False


def extract_ncfile_metadata(filepath: str):
raw_meta = read_raw_nc_meta_from_file(filepath)
if not raw_meta:
return False, ""

# obvious checks, if these keys are missing, extraction fails
if 'metadata' not in raw_meta or 'bands' not in raw_meta:
return False, ""

if '' not in raw_meta['metadata']:
return False, ""

if len(raw_meta['bands']) < 0:
return False, ""

# nc metadata extraction
nc_size = None
if 'size' in raw_meta:
nc_size = raw_meta['size']

nc_cornerCoordinates = None
if 'cornerCoordinates' in raw_meta:
try:
nc_cornerCoordinates: NCCornerCoordinates = {
'upperLeft': raw_meta['cornerCoordinates']['upperLeft'],
'upperRight': raw_meta['cornerCoordinates']['upperRight'],
'lowerLeft': raw_meta['cornerCoordinates']['lowerLeft'],
'lowerRight': raw_meta['cornerCoordinates']['lowerRight'],
'center': raw_meta['cornerCoordinates']['center']
}
except Exception:
nc_cornerCoordinates = None

nc_extent = None
extent_key = False
if 'extent' in raw_meta:
extent_key = 'extent'
elif 'wgs84Extent' in raw_meta:
extent_key = 'wgs84Extent'

if extent_key:
try:
nc_extent: NCExtent = {
'type': raw_meta[extent_key]['type'],
'coordinates': raw_meta[extent_key]['coordinates']
}
except Exception:
nc_extent = None

# second level metadata
sub_meta = raw_meta['metadata']['']

nc_netcdf_times = []
if 'NETCDF_DIM_time_VALUES' in sub_meta:
try:
raw_time_values = sub_meta['NETCDF_DIM_time_VALUES']
raw_time_values = raw_time_values.replace("{", "").replace("}", "").replace(" ", "")
nc_netcdf_times = raw_time_values.split(",")
except Exception:
return False, ""
else:
# NOTE - this is a full failure, because time values are always assumed
return False, ""

nc_time_calendar = None
if 'time#calendar' in sub_meta:
nc_time_calendar = sub_meta['time#calendar']

nc_time_units = None
if 'time#units' in sub_meta:
nc_time_units = sub_meta['time#units']

bands_meta = raw_meta['bands']
nc_extracted_bands_meta: dict[str, NCBand] = {}
try:
for i, b_meta in enumerate(bands_meta):
band_collect: NCBand = {}
if 'computedMin' in b_meta:
band_collect['min'] = b_meta['computedMin']
elif 'min' in b_meta:
band_collect['min'] = b_meta['min']
else:
try:
if 'valid_min' in b_meta['metadata']['']:
band_collect['min'] = b_meta['metadata']['']['valid_min']
else:
band_collect['min'] = None
except Exception:
pass
band_collect['min'] = None

if 'computedMax' in b_meta:
band_collect['max'] = b_meta['computedMax']
elif 'max' in b_meta:
band_collect['max'] = b_meta['max']
else:
band_collect['max'] = None

band_collect['NETCDF_DIM_time'] = b_meta['metadata'][''][
'NETCDF_DIM_time'
]
band_collect["index"] = i + 1
nc_extracted_bands_meta[str(i + 1)] = band_collect
except Exception:
return False, "missing metadata key,value pairs"

nc_varinfo: NCVariable = {}
first_band = bands_meta[0]
if 'type' in first_band:
nc_varinfo['type'] = first_band['type']
else:
nc_varinfo['type'] = None
try:
fb_sub = first_band['metadata']['']
if 'NETCDF_VARNAME' in fb_sub:
nc_varinfo['NETCDF_VARNAME'] = fb_sub['NETCDF_VARNAME']
else:
nc_varinfo['NETCDF_VARNAME'] = None

if 'standard_name' in fb_sub:
nc_varinfo['standard_name'] = fb_sub['standard_name']
else:
nc_varinfo['standard_name'] = None

if 'long_name' in fb_sub:
nc_varinfo['long_name'] = fb_sub['long_name']
else:
nc_varinfo['long_name'] = None

if 'units' in fb_sub:
nc_varinfo['unit'] = fb_sub['units']
else:
nc_varinfo['unit'] = None
except Exception:
nc_varinfo['NETCDF_VARNAME'] = None
nc_varinfo['standard_name'] = None
nc_varinfo['long_name'] = None
nc_varinfo['unit'] = None

full_nc_meta: NCMetaData = {
'size': nc_size,
'cornerCoordinates': nc_cornerCoordinates,
'extent': nc_extent,
'NETCDF_DIM_time_VALUES': nc_netcdf_times,
'time#calendar': nc_time_calendar,
'time#units': nc_time_units,
'bands': nc_extracted_bands_meta,
'varinfo': nc_varinfo,
'num_bands': len(nc_extracted_bands_meta)
}

return True, full_nc_meta
Loading

0 comments on commit 684d14f

Please sign in to comment.