Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add virtual_backend_kwargs argument to open_virtual_dataset #315

Merged
merged 5 commits into from
Nov 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/releases.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ v1.1.1 (unreleased)
New Features
~~~~~~~~~~~~

- Add a ``virtual_backend_kwargs`` keyword argument to file readers and to ``open_virtual_dataset``, to allow reader-specific options to be passed down.
(:pull:`315`) By `Tom Nicholas <https://github.com/TomNicholas>`_.

Breaking changes
~~~~~~~~~~~~~~~~

Expand Down
4 changes: 4 additions & 0 deletions virtualizarr/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ def open_virtual_dataset(
cftime_variables: Iterable[str] | None = None,
indexes: Mapping[str, Index] | None = None,
virtual_array_class=ManifestArray,
virtual_backend_kwargs: Optional[dict] = None,
reader_options: Optional[dict] = None,
backend: Optional[VirtualBackend] = None,
) -> Dataset:
Expand Down Expand Up @@ -147,6 +148,8 @@ def open_virtual_dataset(
virtual_array_class
Virtual array class to use to represent the references to the chunks in each on-disk array.
Currently can only be ManifestArray, but once VirtualZarrArray is implemented the default should be changed to that.
virtual_backend_kwargs: dict, default is None
Dictionary of keyword arguments passed down to this reader. Allows passing arguments specific to certain readers.
reader_options: dict, default {}
Dict passed into Kerchunk file readers, to allow reading from remote filesystems.
Note: Each Kerchunk file reader has distinct arguments, so ensure reader_options match selected Kerchunk reader arguments.
Expand Down Expand Up @@ -201,6 +204,7 @@ def open_virtual_dataset(
loadable_variables=loadable_variables,
decode_times=decode_times,
indexes=indexes,
virtual_backend_kwargs=virtual_backend_kwargs,
reader_options=reader_options,
)

Expand Down
2 changes: 2 additions & 0 deletions virtualizarr/readers/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ def open_virtual_dataset(
loadable_variables: Iterable[str] | None = None,
decode_times: bool | None = None,
indexes: Mapping[str, Index] | None = None,
virtual_backend_kwargs: Optional[dict] = None,
reader_options: Optional[dict] = None,
) -> Dataset:
raise NotImplementedError()
Expand All @@ -180,6 +181,7 @@ def open_virtual_datatree(
loadable_variables: Iterable[str] | None = None,
decode_times: bool | None = None,
indexes: Mapping[str, Index] | None = None,
virtual_backend_kwargs: Optional[dict] = None,
reader_options: Optional[dict] = None,
) -> DataTree:
raise NotImplementedError()
6 changes: 6 additions & 0 deletions virtualizarr/readers/dmrpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,19 @@ def open_virtual_dataset(
loadable_variables: Iterable[str] | None = None,
decode_times: bool | None = None,
indexes: Mapping[str, Index] | None = None,
virtual_backend_kwargs: Optional[dict] = None,
reader_options: Optional[dict] = None,
) -> Dataset:
loadable_variables, drop_variables = check_for_collisions(
drop_variables=drop_variables,
loadable_variables=loadable_variables,
)

if virtual_backend_kwargs:
raise NotImplementedError(
"DMR++ reader does not understand any virtual_backend_kwargs"
)

if loadable_variables != [] or decode_times or indexes is None:
raise NotImplementedError(
"Specifying `loadable_variables` or auto-creating indexes with `indexes=None` is not supported for dmrpp files."
Expand Down
6 changes: 6 additions & 0 deletions virtualizarr/readers/fits.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,16 @@ def open_virtual_dataset(
loadable_variables: Iterable[str] | None = None,
decode_times: bool | None = None,
indexes: Mapping[str, Index] | None = None,
virtual_backend_kwargs: Optional[dict] = None,
reader_options: Optional[dict] = None,
) -> Dataset:
from kerchunk.fits import process_file

if virtual_backend_kwargs:
raise NotImplementedError(
"FITS reader does not understand any virtual_backend_kwargs"
)

# handle inconsistency in kerchunk, see GH issue https://github.com/zarr-developers/VirtualiZarr/issues/160
refs = KerchunkStoreRefs({"refs": process_file(filepath, **reader_options)})

Expand Down
6 changes: 6 additions & 0 deletions virtualizarr/readers/hdf/hdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,14 @@ def open_virtual_dataset(
loadable_variables: Iterable[str] | None = None,
decode_times: bool | None = None,
indexes: Mapping[str, Index] | None = None,
virtual_backend_kwargs: Optional[dict] = None,
reader_options: Optional[dict] = None,
) -> xr.Dataset:
if virtual_backend_kwargs:
raise NotImplementedError(
"HDF reader does not understand any virtual_backend_kwargs"
)

drop_variables, loadable_variables = check_for_collisions(
drop_variables,
loadable_variables,
Expand Down
6 changes: 6 additions & 0 deletions virtualizarr/readers/hdf5.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,16 @@ def open_virtual_dataset(
loadable_variables: Iterable[str] | None = None,
decode_times: bool | None = None,
indexes: Mapping[str, Index] | None = None,
virtual_backend_kwargs: Optional[dict] = None,
reader_options: Optional[dict] = None,
) -> Dataset:
from kerchunk.hdf import SingleHdf5ToZarr

if virtual_backend_kwargs:
raise NotImplementedError(
"HDF5 reader does not understand any virtual_backend_kwargs"
)

drop_variables, loadable_variables = check_for_collisions(
drop_variables,
loadable_variables,
Expand Down
6 changes: 6 additions & 0 deletions virtualizarr/readers/kerchunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,16 @@ def open_virtual_dataset(
loadable_variables: Iterable[str] | None = None,
decode_times: bool | None = None,
indexes: Mapping[str, Index] | None = None,
virtual_backend_kwargs: Optional[dict] = None,
reader_options: Optional[dict] = None,
) -> Dataset:
"""Reads existing kerchunk references (in JSON or parquet) format."""

if virtual_backend_kwargs:
raise NotImplementedError(
"Kerchunk reader does not understand any virtual_backend_kwargs"
)

if group:
raise NotImplementedError()

Expand Down
6 changes: 6 additions & 0 deletions virtualizarr/readers/netcdf3.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,16 @@ def open_virtual_dataset(
loadable_variables: Iterable[str] | None = None,
decode_times: bool | None = None,
indexes: Mapping[str, Index] | None = None,
virtual_backend_kwargs: Optional[dict] = None,
reader_options: Optional[dict] = None,
) -> Dataset:
from kerchunk.netCDF3 import NetCDF3ToZarr

if virtual_backend_kwargs:
raise NotImplementedError(
"netcdf3 reader does not understand any virtual_backend_kwargs"
)

drop_variables, loadable_variables = check_for_collisions(
drop_variables,
loadable_variables,
Expand Down
6 changes: 6 additions & 0 deletions virtualizarr/readers/tiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,14 @@ def open_virtual_dataset(
loadable_variables: Iterable[str] | None = None,
decode_times: bool | None = None,
indexes: Mapping[str, Index] | None = None,
virtual_backend_kwargs: Optional[dict] = None,
reader_options: Optional[dict] = None,
) -> Dataset:
if virtual_backend_kwargs:
raise NotImplementedError(
"TIFF reader does not understand any virtual_backend_kwargs"
)

from kerchunk.tiff import tiff_to_zarr

drop_variables, loadable_variables = check_for_collisions(
Expand Down
6 changes: 6 additions & 0 deletions virtualizarr/readers/zarr_v3.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,19 @@ def open_virtual_dataset(
loadable_variables: Iterable[str] | None = None,
decode_times: bool | None = None,
indexes: Mapping[str, Index] | None = None,
virtual_backend_kwargs: Optional[dict] = None,
reader_options: Optional[dict] = None,
) -> Dataset:
"""
Read a Zarr v3 store containing chunk manifests and return an xarray Dataset containing virtualized arrays.

This is experimental - chunk manifests are not part of the Zarr v3 Spec.
"""
if virtual_backend_kwargs:
raise NotImplementedError(
"Zarr_v3 reader does not understand any virtual_backend_kwargs"
)

storepath = Path(filepath)

if group:
Expand Down