Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement relative altitude filter #42

Merged
merged 35 commits into from
Sep 6, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
0f2427b
Boilerplate
thorbjoernl Sep 4, 2024
461f1fc
Progress on implementing relative altitude filter
thorbjoernl Sep 4, 2024
da22b65
fix dependencies
thorbjoernl Sep 4, 2024
0c02e39
Add tests and test topography data
thorbjoernl Sep 5, 2024
41efda6
.
thorbjoernl Sep 5, 2024
8d1379f
.
thorbjoernl Sep 5, 2024
155cf38
CR1
thorbjoernl Sep 5, 2024
45fc98e
CR2
thorbjoernl Sep 5, 2024
cdef4c1
Remove netcdf as explicit dependency
thorbjoernl Sep 5, 2024
3cc7d6e
WIP properly read coords and determine type
thorbjoernl Sep 5, 2024
b77ab63
.
thorbjoernl Sep 5, 2024
c55c3ad
.
thorbjoernl Sep 5, 2024
72cc4a3
.
thorbjoernl Sep 5, 2024
926a6f0
Add cfunits as dependency
thorbjoernl Sep 5, 2024
d1627da
Install libudunits2-dev in ci
thorbjoernl Sep 5, 2024
3b45cb8
.
thorbjoernl Sep 5, 2024
83ca967
.
thorbjoernl Sep 5, 2024
dc3c47f
.
thorbjoernl Sep 5, 2024
9a459d9
Add second test data file
thorbjoernl Sep 5, 2024
709e9bf
.
thorbjoernl Sep 5, 2024
405d62c
Tests green locally
thorbjoernl Sep 5, 2024
fb86b1e
Numpy 2 breaks fix
thorbjoernl Sep 5, 2024
da9b97f
Remove code comments + model->gridded
thorbjoernl Sep 5, 2024
102a5c5
.
thorbjoernl Sep 5, 2024
0e7a6f2
.
thorbjoernl Sep 5, 2024
b79c5c2
Make dependencies optional
thorbjoernl Sep 6, 2024
6717392
Oopsie
thorbjoernl Sep 6, 2024
e557496
.
thorbjoernl Sep 6, 2024
f79683c
.
thorbjoernl Sep 6, 2024
0b5d8ed
.
thorbjoernl Sep 6, 2024
cd1dfc9
...
thorbjoernl Sep 6, 2024
b3742f4
Exclude stations outside grid bounding bocks
thorbjoernl Sep 6, 2024
600865a
Add benchmark
thorbjoernl Sep 6, 2024
82df240
Optimized code: benchmark from 27.133s->0.067s
thorbjoernl Sep 6, 2024
a69a48d
Preallocate numpy arrays
thorbjoernl Sep 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions scripts/make_topography_subset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import xarray as xr

# Script which extracts only the first time point and topography value from the emep topography
# file to reduce data for the tests.
data = xr.open_dataset("/lustre/storeB/project/fou/kl/emep/Auxiliary/topography.nc")

start_time = data["time"][0]

data = data.sel(time=slice(start_time))

data = data["topography"]


data.to_netcdf("tests/testdata/datadir_elevation/topography.nc")
2 changes: 2 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ python_version = >=3.10
install_requires =
numpy >= 1.13
importlib-metadata >= 3.6; python_version < "3.10"
xarray
netcdf4
thorbjoernl marked this conversation as resolved.
Show resolved Hide resolved
package_dir =
=src
packages = pyaro, pyaro.timeseries, pyaro.csvreader
Expand Down
2 changes: 1 addition & 1 deletion src/pyaro/timeseries/AutoFilterReaderEngine.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def supported_filters(cls) -> list[Filter]:

:return: list of filters
"""
supported = "variables,stations,countries,bounding_boxes,duplicates,time_bounds,time_resolution,flags,time_variable_station,altitude".split(
supported = "variables,stations,countries,bounding_boxes,duplicates,time_bounds,time_resolution,flags,time_variable_station,altitude,relaltitude".split(
","
)
return [filters.get(name) for name in supported]
Expand Down
84 changes: 83 additions & 1 deletion src/pyaro/timeseries/Filter.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
import math
import abc
from collections import defaultdict
Expand All @@ -9,10 +10,12 @@
from typing import Any

import numpy as np
import xarray as xr

from .Data import Data, Flag
from .Station import Station

logger = logging.getLogger(__name__)

class Filter(abc.ABC):
"""Base-class for all filters used from pyaro-Readers"""
Expand Down Expand Up @@ -855,4 +858,83 @@ def filter_stations(self, stations: dict[str, Station]) -> dict[str, Station]:
if self._max_altitude is not None:
stations = {n: s for n, s in stations.items() if (not math.isnan(s["altitude"]) and s["altitude"] <= self._max_altitude) }

return stations
return stations

@registered_filter
class RelativeAltitudeFilter(StationFilter):
"""
Filter class which filters stations based on the relative difference between
the station altitude, and the model topography altitude.

thorbjoernl marked this conversation as resolved.
Show resolved Hide resolved
https://github.com/metno/pyaro/issues/39
"""
def __init__(self, topo_file: str | None = None, topo_var: str = "topography", rtol: float = 1):
"""
:param topo_file : A .nc file from which to read model topography data.
thorbjoernl marked this conversation as resolved.
Show resolved Hide resolved
:param topo_var : Name of variable that stores altitude.
:param rtol : Relative toleranse.
thorbjoernl marked this conversation as resolved.
Show resolved Hide resolved

Note:
-----
Stations will be kept if abs(altobs-altmod) <= rtol*abs(altobs)
"""
thorbjoernl marked this conversation as resolved.
Show resolved Hide resolved
self._topo_file = topo_file
self._topo_var = topo_var
self._rtol = rtol

self._topography = None
if topo_file is not None:
self._topography = xr.open_dataset(topo_file)
else:
logger.warning("No topography data provided (topo_file='%s'). Relative elevation filtering will not be applied.", topo_file)

def _model_altitude_from_lat_lon(self, lat: float, lon: float) -> float:
# TODO: Include a tolerance?
data = self._topography.sel({"lat": lat, "lon": lon}, method="nearest")
thorbjoernl marked this conversation as resolved.
Show resolved Hide resolved

# Should not vary in time too much so picking the first one here.
altitude = data["topography"][0]
thorbjoernl marked this conversation as resolved.
Show resolved Hide resolved

return float(altitude)

def _is_close(self, altmod: float, altobs: float) -> bool:
"""
Function to check if two altitudes are within a relative tolerance of each
other.

:param altmod : Model altitude (in meters).
:param altobs : Observation / station altitude (in meters).

:returns :
True if the values are close with station altitude as the reference
value.
"""
return abs(altmod-altobs) <= (self._rtol * abs(altobs))
thorbjoernl marked this conversation as resolved.
Show resolved Hide resolved

def init_kwargs(self):
return {
"topo_file": self._file,
"topo_var": self._topo_var,
"rtol": self._rtol
}

def name(self):
return "relaltitude"

def filter_stations(self, stations: dict[str, Station]) -> dict[str, Station]:
if self._topography is None:
return stations

new_stations = dict()

for name, station in stations.items():
lat = station["latitude"]
lon = station["longitude"]

altobs = station["altitude"]
altmod = self._model_altitude_from_lat_lon(lat, lon)

if self._is_close(altmod, altobs):
new_stations[name] = station

return new_stations
76 changes: 76 additions & 0 deletions tests/test_CSVTimeSeriesReader.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,82 @@ def test_altitude_filter_3(self):
}
) as ts:
self.assertEqual(len(ts.stations()), 1)

def test_relaltitude_filter_1(self):
engines = pyaro.list_timeseries_engines()
with engines["csv_timeseries"].open(
filename=self.elevation_file,
filters=[pyaro.timeseries.filters.get("relaltitude", topo_file = "./tests/testdata/datadir_elevation/topography.nc", rtol=0)],
columns={
"variable": 0,
"station": 1,
"longitude": 2,
"latitude": 3,
"value": 4,
"units": 5,
"start_time": 6,
"end_time": 7,
"altitude": 9,
"country": "NO",
"standard_deviation": "NaN",
"flag": "0",
}
) as ts:
# Altitudes in dataset:
# Station | Alt_obs | Modeobs |
# Station 1 | 100 | 12.2554 |
# Station 2 | 200 | 4.9016 |
# Station 3 | 300 | 4.9016 |
# Since rtol = 0, no station should be included.
self.assertEqual(len(ts.stations()), 0)

def test_relaltitude_filter_2(self):
engines = pyaro.list_timeseries_engines()
with engines["csv_timeseries"].open(
filename=self.elevation_file,
filters=[pyaro.timeseries.filters.get("relaltitude", topo_file = "./tests/testdata/datadir_elevation/topography.nc", rtol=0.89)],
columns={
"variable": 0,
"station": 1,
"longitude": 2,
"latitude": 3,
"value": 4,
"units": 5,
"start_time": 6,
"end_time": 7,
"altitude": 9,
"country": "NO",
"standard_deviation": "NaN",
"flag": "0",
}
) as ts:
# At rtol = 0.89, only the first station should be included.
self.assertEqual(len(ts.stations()), 1)

def test_relaltitude_filter_3(self):
engines = pyaro.list_timeseries_engines()
with engines["csv_timeseries"].open(
filename=self.elevation_file,
filters=[pyaro.timeseries.filters.get("relaltitude", topo_file = "./tests/testdata/datadir_elevation/topography.nc", rtol=1)],
columns={
"variable": 0,
"station": 1,
"longitude": 2,
"latitude": 3,
"value": 4,
"units": 5,
"start_time": 6,
"end_time": 7,
"altitude": 9,
"country": "NO",
"standard_deviation": "NaN",
"flag": "0",
}
) as ts:
# Since rtol=1, all stations should be included.
self.assertEqual(len(ts.stations()), 3)




if __name__ == "__main__":
Expand Down
Binary file added tests/testdata/datadir_elevation/topography.nc
Binary file not shown.