Skip to content

Commit

Permalink
Add reader_kwargs argument to open_virtual_dataset (#315)
Browse files Browse the repository at this point in the history
* add reader_kwargs argument to open_virtual_dataset, and pass it down to every reader

* rename reader_kwargs -> virtual_backend_kwargs

* release note
  • Loading branch information
TomNicholas authored Nov 27, 2024
1 parent 3d7a4be commit 5152624
Show file tree
Hide file tree
Showing 11 changed files with 57 additions and 0 deletions.
3 changes: 3 additions & 0 deletions docs/releases.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ v1.1.1 (unreleased)
New Features
~~~~~~~~~~~~

- Add a ``virtual_backend_kwargs`` keyword argument to file readers and to ``open_virtual_dataset``, to allow reader-specific options to be passed down.
(:pull:`315`) By `Tom Nicholas <https://github.com/TomNicholas>`_.

Breaking changes
~~~~~~~~~~~~~~~~

Expand Down
4 changes: 4 additions & 0 deletions virtualizarr/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ def open_virtual_dataset(
cftime_variables: Iterable[str] | None = None,
indexes: Mapping[str, Index] | None = None,
virtual_array_class=ManifestArray,
virtual_backend_kwargs: Optional[dict] = None,
reader_options: Optional[dict] = None,
backend: Optional[VirtualBackend] = None,
) -> Dataset:
Expand Down Expand Up @@ -147,6 +148,8 @@ def open_virtual_dataset(
virtual_array_class
Virtual array class to use to represent the references to the chunks in each on-disk array.
Currently can only be ManifestArray, but once VirtualZarrArray is implemented the default should be changed to that.
virtual_backend_kwargs: dict, default is None
Dictionary of keyword arguments passed down to this reader. Allows passing arguments specific to certain readers.
reader_options: dict, default {}
Dict passed into Kerchunk file readers, to allow reading from remote filesystems.
Note: Each Kerchunk file reader has distinct arguments, so ensure reader_options match selected Kerchunk reader arguments.
Expand Down Expand Up @@ -201,6 +204,7 @@ def open_virtual_dataset(
loadable_variables=loadable_variables,
decode_times=decode_times,
indexes=indexes,
virtual_backend_kwargs=virtual_backend_kwargs,
reader_options=reader_options,
)

Expand Down
2 changes: 2 additions & 0 deletions virtualizarr/readers/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ def open_virtual_dataset(
loadable_variables: Iterable[str] | None = None,
decode_times: bool | None = None,
indexes: Mapping[str, Index] | None = None,
virtual_backend_kwargs: Optional[dict] = None,
reader_options: Optional[dict] = None,
) -> Dataset:
raise NotImplementedError()
Expand All @@ -180,6 +181,7 @@ def open_virtual_datatree(
loadable_variables: Iterable[str] | None = None,
decode_times: bool | None = None,
indexes: Mapping[str, Index] | None = None,
virtual_backend_kwargs: Optional[dict] = None,
reader_options: Optional[dict] = None,
) -> DataTree:
raise NotImplementedError()
6 changes: 6 additions & 0 deletions virtualizarr/readers/dmrpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,19 @@ def open_virtual_dataset(
loadable_variables: Iterable[str] | None = None,
decode_times: bool | None = None,
indexes: Mapping[str, Index] | None = None,
virtual_backend_kwargs: Optional[dict] = None,
reader_options: Optional[dict] = None,
) -> Dataset:
loadable_variables, drop_variables = check_for_collisions(
drop_variables=drop_variables,
loadable_variables=loadable_variables,
)

if virtual_backend_kwargs:
raise NotImplementedError(
"DMR++ reader does not understand any virtual_backend_kwargs"
)

if loadable_variables != [] or decode_times or indexes is None:
raise NotImplementedError(
"Specifying `loadable_variables` or auto-creating indexes with `indexes=None` is not supported for dmrpp files."
Expand Down
6 changes: 6 additions & 0 deletions virtualizarr/readers/fits.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,16 @@ def open_virtual_dataset(
loadable_variables: Iterable[str] | None = None,
decode_times: bool | None = None,
indexes: Mapping[str, Index] | None = None,
virtual_backend_kwargs: Optional[dict] = None,
reader_options: Optional[dict] = None,
) -> Dataset:
from kerchunk.fits import process_file

if virtual_backend_kwargs:
raise NotImplementedError(
"FITS reader does not understand any virtual_backend_kwargs"
)

# handle inconsistency in kerchunk, see GH issue https://github.com/zarr-developers/VirtualiZarr/issues/160
refs = KerchunkStoreRefs({"refs": process_file(filepath, **reader_options)})

Expand Down
6 changes: 6 additions & 0 deletions virtualizarr/readers/hdf/hdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,14 @@ def open_virtual_dataset(
loadable_variables: Iterable[str] | None = None,
decode_times: bool | None = None,
indexes: Mapping[str, Index] | None = None,
virtual_backend_kwargs: Optional[dict] = None,
reader_options: Optional[dict] = None,
) -> xr.Dataset:
if virtual_backend_kwargs:
raise NotImplementedError(
"HDF reader does not understand any virtual_backend_kwargs"
)

drop_variables, loadable_variables = check_for_collisions(
drop_variables,
loadable_variables,
Expand Down
6 changes: 6 additions & 0 deletions virtualizarr/readers/hdf5.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,16 @@ def open_virtual_dataset(
loadable_variables: Iterable[str] | None = None,
decode_times: bool | None = None,
indexes: Mapping[str, Index] | None = None,
virtual_backend_kwargs: Optional[dict] = None,
reader_options: Optional[dict] = None,
) -> Dataset:
from kerchunk.hdf import SingleHdf5ToZarr

if virtual_backend_kwargs:
raise NotImplementedError(
"HDF5 reader does not understand any virtual_backend_kwargs"
)

drop_variables, loadable_variables = check_for_collisions(
drop_variables,
loadable_variables,
Expand Down
6 changes: 6 additions & 0 deletions virtualizarr/readers/kerchunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,16 @@ def open_virtual_dataset(
loadable_variables: Iterable[str] | None = None,
decode_times: bool | None = None,
indexes: Mapping[str, Index] | None = None,
virtual_backend_kwargs: Optional[dict] = None,
reader_options: Optional[dict] = None,
) -> Dataset:
"""Reads existing kerchunk references (in JSON or parquet) format."""

if virtual_backend_kwargs:
raise NotImplementedError(
"Kerchunk reader does not understand any virtual_backend_kwargs"
)

if group:
raise NotImplementedError()

Expand Down
6 changes: 6 additions & 0 deletions virtualizarr/readers/netcdf3.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,16 @@ def open_virtual_dataset(
loadable_variables: Iterable[str] | None = None,
decode_times: bool | None = None,
indexes: Mapping[str, Index] | None = None,
virtual_backend_kwargs: Optional[dict] = None,
reader_options: Optional[dict] = None,
) -> Dataset:
from kerchunk.netCDF3 import NetCDF3ToZarr

if virtual_backend_kwargs:
raise NotImplementedError(
"netcdf3 reader does not understand any virtual_backend_kwargs"
)

drop_variables, loadable_variables = check_for_collisions(
drop_variables,
loadable_variables,
Expand Down
6 changes: 6 additions & 0 deletions virtualizarr/readers/tiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,14 @@ def open_virtual_dataset(
loadable_variables: Iterable[str] | None = None,
decode_times: bool | None = None,
indexes: Mapping[str, Index] | None = None,
virtual_backend_kwargs: Optional[dict] = None,
reader_options: Optional[dict] = None,
) -> Dataset:
if virtual_backend_kwargs:
raise NotImplementedError(
"TIFF reader does not understand any virtual_backend_kwargs"
)

from kerchunk.tiff import tiff_to_zarr

drop_variables, loadable_variables = check_for_collisions(
Expand Down
6 changes: 6 additions & 0 deletions virtualizarr/readers/zarr_v3.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,19 @@ def open_virtual_dataset(
loadable_variables: Iterable[str] | None = None,
decode_times: bool | None = None,
indexes: Mapping[str, Index] | None = None,
virtual_backend_kwargs: Optional[dict] = None,
reader_options: Optional[dict] = None,
) -> Dataset:
"""
Read a Zarr v3 store containing chunk manifests and return an xarray Dataset containing virtualized arrays.
This is experimental - chunk manifests are not part of the Zarr v3 Spec.
"""
if virtual_backend_kwargs:
raise NotImplementedError(
"Zarr_v3 reader does not understand any virtual_backend_kwargs"
)

storepath = Path(filepath)

if group:
Expand Down

0 comments on commit 5152624

Please sign in to comment.