-
Notifications
You must be signed in to change notification settings - Fork 41
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix bug with GDAC data source and BGC dataset #418
Changes from all commits
4993592
f8c1a2d
5e9783e
fe65cad
b23932c
b16068d
d0b9c96
cdac3d1
b026a01
5130ac1
8420462
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,7 +6,11 @@ | |
import copy | ||
|
||
from ..utils import to_list, list_core_parameters | ||
from ..utils.transform import split_data_mode, merge_param_with_param_adjusted, filter_param_by_data_mode | ||
from ..utils.transform import ( | ||
split_data_mode, | ||
merge_param_with_param_adjusted, | ||
filter_param_by_data_mode, | ||
) | ||
from ..stores import ( | ||
indexstore_pd as ArgoIndex, | ||
) # make sure we work with a Pandas index store | ||
|
@@ -43,10 +47,12 @@ class ParamsDataMode(ArgoAccessorExtension): | |
def __init__(self, *args, **kwargs): | ||
super().__init__(*args, **kwargs) | ||
|
||
def compute(self, indexfs: Union[None, ArgoIndex]) -> xr.Dataset: # noqa: C901 | ||
"""Compute and add <PARAM>_DATA_MODE variables to a xarray dataset | ||
def _compute_from_ArgoIndex( | ||
self, indexfs: Union[None, ArgoIndex] | ||
) -> xr.Dataset: # noqa: C901 | ||
"""Compute <PARAM>_DATA_MODE variables from ArgoIndex | ||
|
||
This method consume a collection of points. | ||
This method consumes a collection of points. | ||
|
||
Parameters | ||
---------- | ||
|
@@ -55,9 +61,9 @@ def compute(self, indexfs: Union[None, ArgoIndex]) -> xr.Dataset: # noqa: C901 | |
|
||
Returns | ||
------- | ||
:class:`xr.Dataset` | ||
:class:`xarray.Dataset` | ||
""" | ||
idx = copy.copy(indexfs) if isinstance(indexfs, ArgoIndex) else ArgoIndex() | ||
idx = indexfs.copy(deep=True) if isinstance(indexfs, ArgoIndex) else ArgoIndex() | ||
|
||
def complete_df(this_df, params): | ||
"""Add 'wmo', 'cyc' and '<param>_data_mode' columns to this dataframe""" | ||
|
@@ -103,6 +109,7 @@ def print_etime(txt, t0): | |
|
||
profiles = self._argo.list_WMO_CYC | ||
idx.search_wmo(self._argo.list_WMO) | ||
|
||
params = [ | ||
p | ||
for p in idx.read_params() | ||
|
@@ -168,10 +175,30 @@ def print_etime(txt, t0): | |
self._obj = self._obj[np.sort(self._obj.data_vars)] | ||
return self._obj | ||
|
||
def split(self): | ||
def compute(self, indexfs: Union[None, ArgoIndex]) -> xr.Dataset: | ||
"""Compute <PARAM>_DATA_MODE variables""" | ||
if "STATION_PARAMETERS" in self._obj and "PARAMETER_DATA_MODE" in self._obj: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I did not spot this subtility before |
||
return split_data_mode(self._obj) | ||
else: | ||
return self._compute_from_ArgoIndex(indexfs=indexfs) | ||
|
||
def split(self) -> xr.Dataset: | ||
"""Convert PARAMETER_DATA_MODE(N_PROF, N_PARAM) into several <PARAM>_DATA_MODE(N_PROF) variables | ||
|
||
Using the list of *PARAM* found in ``STATION_PARAMETERS``, this method will create ``N_PARAM`` | ||
new variables in the dataset ``<PARAM>_DATA_MODE(N_PROF)``. | ||
|
||
The variable ``PARAMETER_DATA_MODE`` is drop from the dataset at the end of the process. | ||
|
||
Returns | ||
------- | ||
:class:`xarray.Dataset` | ||
""" | ||
return split_data_mode(self._obj) | ||
|
||
def merge(self, params: Union[str, List[str]] = "all", errors: str = "raise") -> xr.Dataset: | ||
def merge( | ||
self, params: Union[str, List[str]] = "all", errors: str = "raise" | ||
) -> xr.Dataset: | ||
"""Merge <PARAM> and <PARAM>_ADJUSTED variables according to DATA_MODE or <PARAM>_DATA_MODE | ||
|
||
Merging is done as follows: | ||
|
@@ -251,7 +278,7 @@ def filter( | |
logical: str = "and", | ||
mask: bool = False, | ||
errors: str = "raise", | ||
): | ||
) -> xr.Dataset: | ||
"""Filter measurements according to parameters data mode | ||
|
||
Filter the dataset to keep points where all or some of the parameters are in any of the data mode specified. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -177,6 +177,8 @@ def __init__(self, mode: str = "", src: str = "", ds: str = "", **fetcher_kwargs | |
raise OptionValueError( | ||
"The 'argovis' data source fetching is only available in 'standard' user mode" | ||
) | ||
if self._src == "gdac" and "bgc" in self._dataset_id: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I dont remember why this was removed |
||
warnings.warn("BGC data support with the 'gdac' data source is still in Work In Progress") | ||
|
||
@property | ||
def _icon_user_mode(self): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,6 +4,7 @@ | |
|
||
import numpy as np | ||
import xarray as xr | ||
import pandas as pd | ||
import logging | ||
from typing import List, Union | ||
|
||
|
@@ -340,6 +341,7 @@ def filter_param_by_data_mode( | |
return ds.loc[dict(N_POINTS=filter)] if len(filter) > 0 else ds | ||
|
||
|
||
|
||
def split_data_mode(ds: xr.Dataset) -> xr.Dataset: | ||
"""Convert PARAMETER_DATA_MODE(N_PROF, N_PARAM) into several <PARAM>_DATA_MODE(N_PROF) variables | ||
|
||
|
@@ -352,24 +354,48 @@ def split_data_mode(ds: xr.Dataset) -> xr.Dataset: | |
------- | ||
:class:`xr.Dataset` | ||
""" | ||
if ds.argo._type != "profile": | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Raise an error to avoid passing through for the wrong reasons and not detecting it |
||
raise InvalidDatasetStructure( | ||
"Method only available to a collection of profiles" | ||
) | ||
|
||
if "STATION_PARAMETERS" in ds and "PARAMETER_DATA_MODE" in ds: | ||
|
||
u64 = lambda s: "%s%s" % (s, " " * (64 - len(s))) # noqa: E731 | ||
params = [p.strip() for p in np.unique(ds["STATION_PARAMETERS"])] | ||
|
||
def read_data_mode_for(ds: xr.Dataset, param: str) -> xr.DataArray: | ||
"""Return data mode of a given parameter""" | ||
da_masked = ds['PARAMETER_DATA_MODE'].where(ds['STATION_PARAMETERS'] == u64(param)) | ||
|
||
def _dropna(x): | ||
# x('N_PARAM') is reduced to the first non nan value, a scalar, no dimension | ||
y = pd.Series(x).dropna().tolist() | ||
if len(y) == 0: | ||
return "" | ||
else: | ||
return y[0] | ||
|
||
kwargs = dict( | ||
dask="parallelized", | ||
input_core_dims=[["N_PARAM"]], # Function takes N_PARAM as input | ||
output_core_dims=[[]], # Function reduces to a scalar (no dimension) | ||
vectorize=True # Apply function element-wise along the other dimensions | ||
) | ||
|
||
dm = xr.apply_ufunc(_dropna, da_masked, **kwargs) | ||
dm = dm.rename("%s_DATA_MODE" % param) | ||
dm.attrs = ds['PARAMETER_DATA_MODE'].attrs | ||
return dm | ||
|
||
for param in params: | ||
name = "%s_DATA_MODE" % param.replace("_PARAMETER", "").replace( | ||
"PARAMETER_", "" | ||
) | ||
mask = ds["STATION_PARAMETERS"] == xr.full_like( | ||
ds["STATION_PARAMETERS"], | ||
u64(param), | ||
dtype=ds["STATION_PARAMETERS"].dtype, | ||
) | ||
da = ds["PARAMETER_DATA_MODE"].where(mask, drop=True).isel(N_PARAM=0) | ||
da = da.rename(name) | ||
da = da.astype(ds["PARAMETER_DATA_MODE"].dtype) | ||
ds[name] = da | ||
if name == "_DATA_MODE": | ||
log.error("This dataset has an error in 'STATION_PARAMETERS': it contains an empty string") | ||
else: | ||
ds[name] = read_data_mode_for(ds, param) | ||
|
||
ds = ds.drop_vars("PARAMETER_DATA_MODE") | ||
ds.argo.add_history("Transformed with 'split_data_mode'") | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Here we have a new method
copy
for the ArgoIndex