Skip to content

Commit

Permalink
Address latest pandas-related upstream test failures (pydata#9081)
Browse files Browse the repository at this point in the history
* Address pandas-related upstream test failures

* Address more warnings

* Don't lose coverage for pandas < 3

* Address one more warning

* Fix accidental change from MS to ME

* Use datetime64[ns] arrays

* Switch to @pytest.mark.filterwarnings
  • Loading branch information
spencerkclark authored Jun 10, 2024
1 parent ccebef0 commit ef709df
Show file tree
Hide file tree
Showing 8 changed files with 33 additions and 17 deletions.
10 changes: 9 additions & 1 deletion xarray/coding/cftime_offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,13 +77,21 @@
from xarray.core.types import InclusiveOptions, SideOptions


def _nanosecond_precision_timestamp(*args, **kwargs):
# As of pandas version 3.0, pd.to_datetime(Timestamp(...)) will try to
# infer the appropriate datetime precision. Until xarray supports
# non-nanosecond precision times, we will use this constructor wrapper to
# explicitly create nanosecond-precision Timestamp objects.
return pd.Timestamp(*args, **kwargs).as_unit("ns")


def get_date_type(calendar, use_cftime=True):
"""Return the cftime date type for a given calendar name."""
if cftime is None:
raise ImportError("cftime is required for dates with non-standard calendars")
else:
if _is_standard_calendar(calendar) and not use_cftime:
return pd.Timestamp
return _nanosecond_precision_timestamp

calendars = {
"noleap": cftime.DatetimeNoLeap,
Expand Down
2 changes: 1 addition & 1 deletion xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,7 +529,7 @@ def test_roundtrip_string_encoded_characters(self) -> None:
assert actual["x"].encoding["_Encoding"] == "ascii"

def test_roundtrip_numpy_datetime_data(self) -> None:
times = pd.to_datetime(["2000-01-01", "2000-01-02", "NaT"])
times = pd.to_datetime(["2000-01-01", "2000-01-02", "NaT"], unit="ns")
expected = Dataset({"t": ("t", times), "t0": times[0]})
kwargs = {"encoding": {"t0": {"units": "days since 1950-01-01"}}}
with self.roundtrip(expected, save_kwargs=kwargs) as actual:
Expand Down
9 changes: 6 additions & 3 deletions xarray/tests/test_coding_times.py
Original file line number Diff line number Diff line change
Expand Up @@ -538,11 +538,14 @@ def test_infer_datetime_units(freq, units) -> None:
["dates", "expected"],
[
(
pd.to_datetime(["1900-01-01", "1900-01-02", "NaT"]),
pd.to_datetime(["1900-01-01", "1900-01-02", "NaT"], unit="ns"),
"days since 1900-01-01 00:00:00",
),
(pd.to_datetime(["NaT", "1900-01-01"]), "days since 1900-01-01 00:00:00"),
(pd.to_datetime(["NaT"]), "days since 1970-01-01 00:00:00"),
(
pd.to_datetime(["NaT", "1900-01-01"], unit="ns"),
"days since 1900-01-01 00:00:00",
),
(pd.to_datetime(["NaT"], unit="ns"), "days since 1970-01-01 00:00:00"),
],
)
def test_infer_datetime_units_with_NaT(dates, expected) -> None:
Expand Down
9 changes: 6 additions & 3 deletions xarray/tests/test_combine.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

from datetime import datetime
from itertools import product

import numpy as np
Expand Down Expand Up @@ -229,8 +228,12 @@ def test_lexicographic_sort_string_coords(self):
assert concat_dims == ["simulation"]

def test_datetime_coords(self):
ds0 = Dataset({"time": [datetime(2000, 3, 6), datetime(2001, 3, 7)]})
ds1 = Dataset({"time": [datetime(1999, 1, 1), datetime(1999, 2, 4)]})
ds0 = Dataset(
{"time": np.array(["2000-03-06", "2000-03-07"], dtype="datetime64[ns]")}
)
ds1 = Dataset(
{"time": np.array(["1999-01-01", "1999-02-04"], dtype="datetime64[ns]")}
)

expected = {(0,): ds1, (1,): ds0}
actual, concat_dims = _infer_concat_order_from_coords([ds0, ds1])
Expand Down
2 changes: 2 additions & 0 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -3642,6 +3642,7 @@ def test_to_and_from_dict(
actual_no_data = da.to_dict(data=False, encoding=encoding)
assert expected_no_data == actual_no_data

@pytest.mark.filterwarnings("ignore:Converting non-nanosecond")
def test_to_and_from_dict_with_time_dim(self) -> None:
x = np.random.randn(10, 3)
t = pd.date_range("20130101", periods=10)
Expand All @@ -3650,6 +3651,7 @@ def test_to_and_from_dict_with_time_dim(self) -> None:
roundtripped = DataArray.from_dict(da.to_dict())
assert_identical(da, roundtripped)

@pytest.mark.filterwarnings("ignore:Converting non-nanosecond")
def test_to_and_from_dict_with_nan_nat(self) -> None:
y = np.random.randn(10, 3)
y[2] = np.nan
Expand Down
5 changes: 2 additions & 3 deletions xarray/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,7 @@ def test_groupby_da_datetime() -> None:
times = pd.date_range("2000-01-01", periods=4)
foo = xr.DataArray([1, 2, 3, 4], coords=dict(time=times), dims="time")
# create test index
dd = times.to_pydatetime()
reference_dates = [dd[0], dd[2]]
reference_dates = [times[0], times[2]]
labels = reference_dates[0:1] * 2 + reference_dates[1:2] * 2
ind = xr.DataArray(
labels, coords=dict(time=times), dims="time", name="reference_date"
Expand Down Expand Up @@ -1881,7 +1880,7 @@ def test_resample_first(self) -> None:
array = Dataset({"time": times})["time"]
actual = array.resample(time="1D").last()
expected_times = pd.to_datetime(
["2000-01-01T18", "2000-01-02T18", "2000-01-03T06"]
["2000-01-01T18", "2000-01-02T18", "2000-01-03T06"], unit="ns"
)
expected = DataArray(expected_times, [("time", times[::4])], name="time")
assert_identical(expected, actual)
Expand Down
7 changes: 3 additions & 4 deletions xarray/tests/test_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import math
from collections.abc import Generator, Hashable
from copy import copy
from datetime import date, datetime, timedelta
from datetime import date, timedelta
from typing import Any, Callable, Literal

import numpy as np
Expand Down Expand Up @@ -2912,9 +2912,8 @@ def setUp(self) -> None:
"""
month = np.arange(1, 13, 1)
data = np.sin(2 * np.pi * month / 12.0)

darray = DataArray(data, dims=["time"])
darray.coords["time"] = np.array([datetime(2017, m, 1) for m in month])
times = pd.date_range(start="2017-01-01", freq="MS", periods=12)
darray = DataArray(data, dims=["time"], coords=[times])

self.darray = darray

Expand Down
6 changes: 4 additions & 2 deletions xarray/tests/test_variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
assert_equal,
assert_identical,
assert_no_warnings,
has_pandas_3,
raise_if_dask_computes,
requires_bottleneck,
requires_cupy,
Expand Down Expand Up @@ -252,6 +253,7 @@ def test_0d_object_array_with_list(self):
assert_array_equal(x[0].data, listarray.squeeze())
assert_array_equal(x.squeeze().data, listarray.squeeze())

@pytest.mark.filterwarnings("ignore:Converting non-nanosecond")
def test_index_and_concat_datetime(self):
# regression test for #125
date_range = pd.date_range("2011-09-01", periods=10)
Expand Down Expand Up @@ -2942,8 +2944,8 @@ def test_from_pint_wrapping_dask(self, Var):
(np.array([np.datetime64("2000-01-01", "ns")]), False),
(np.array([np.datetime64("2000-01-01", "s")]), True),
(pd.date_range("2000", periods=1), False),
(datetime(2000, 1, 1), False),
(np.array([datetime(2000, 1, 1)]), False),
(datetime(2000, 1, 1), has_pandas_3),
(np.array([datetime(2000, 1, 1)]), has_pandas_3),
(pd.date_range("2000", periods=1, tz=pytz.timezone("America/New_York")), False),
(
pd.Series(
Expand Down

0 comments on commit ef709df

Please sign in to comment.