diff --git a/_stubtest/allowlist.txt b/_stubtest/allowlist.txt index dcf7d3ba337..eabb0d7999b 100644 --- a/_stubtest/allowlist.txt +++ b/_stubtest/allowlist.txt @@ -107,7 +107,9 @@ xarray\.core\.dataset\.DatasetResample$ xarray\.core\.(dataarray|dataset)\.CFTimeIndex\.date_type$ xarray\.core\.(dataarray|dataset)\.CFTimeIndex\.day$ +xarray\.core\.(dataarray|dataset)\.CFTimeIndex\.day_of_week$ xarray\.core\.(dataarray|dataset)\.CFTimeIndex\.dayofweek$ +xarray\.core\.(dataarray|dataset)\.CFTimeIndex\.day_of_year$ xarray\.core\.(dataarray|dataset)\.CFTimeIndex\.dayofyear$ xarray\.core\.(dataarray|dataset)\.CFTimeIndex\.days_in_month$ xarray\.core\.(dataarray|dataset)\.CFTimeIndex\.hour$ diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index cabca85cdbc..df851eec990 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -114,7 +114,9 @@ core.accessor_dt.DatetimeAccessor.date core.accessor_dt.DatetimeAccessor.day core.accessor_dt.DatetimeAccessor.dayofweek + core.accessor_dt.DatetimeAccessor.day_of_week core.accessor_dt.DatetimeAccessor.dayofyear + core.accessor_dt.DatetimeAccessor.day_of_year core.accessor_dt.DatetimeAccessor.days_in_month core.accessor_dt.DatetimeAccessor.daysinmonth core.accessor_dt.DatetimeAccessor.hour @@ -409,6 +411,8 @@ CFTimeIndex.ceil CFTimeIndex.contains CFTimeIndex.copy + CFTimeIndex.day_of_week + CFTimeIndex.day_of_year CFTimeIndex.days_in_month CFTimeIndex.delete CFTimeIndex.difference diff --git a/doc/api/dataarray.rst b/doc/api/dataarray.rst index 9d4e81c8677..e742a1a2ff5 100644 --- a/doc/api/dataarray.rst +++ b/doc/api/dataarray.rst @@ -272,8 +272,10 @@ Datetimelike properties DataArray.dt.second DataArray.dt.microsecond DataArray.dt.nanosecond + DataArray.dt.day_of_week DataArray.dt.dayofweek DataArray.dt.weekday + DataArray.dt.day_of_year DataArray.dt.dayofyear DataArray.dt.quarter DataArray.dt.days_in_month diff --git a/doc/getting-started-guide/why-xarray.rst b/doc/getting-started-guide/why-xarray.rst index 0dc3586fc34..11fca4aeb67 100644 --- a/doc/getting-started-guide/why-xarray.rst +++ b/doc/getting-started-guide/why-xarray.rst @@ -28,7 +28,7 @@ powerful and concise interface. For example: dimensions (array broadcasting) based on dimension names, not shape. - Easily use the `split-apply-combine `_ paradigm with ``groupby``: - ``x.groupby('time.dayofyear').mean()``. + ``x.groupby('time.day_of_year').mean()``. - Database-like alignment based on coordinate labels that smoothly handles missing values: ``x, y = xr.align(x, y, join='outer')``. - Keep track of arbitrary metadata in the form of a Python dictionary: diff --git a/doc/user-guide/time-series.rst b/doc/user-guide/time-series.rst index d3f13c2f03c..bebe55ceca6 100644 --- a/doc/user-guide/time-series.rst +++ b/doc/user-guide/time-series.rst @@ -150,15 +150,15 @@ given ``DataArray`` can be quickly computed using a special ``.dt`` accessor. .. jupyter-execute:: - ds.time.dt.dayofweek + ds.time.dt.day_of_week The ``.dt`` accessor works on both coordinate dimensions as well as multi-dimensional data. Xarray also supports a notion of "virtual" or "derived" coordinates for `datetime components`__ implemented by pandas, including "year", "month", -"day", "hour", "minute", "second", "dayofyear", "week", "dayofweek", "weekday" -and "quarter": +"day", "hour", "minute", "second", "day_of_year", "week", "day_of_week", and +"quarter": __ https://pandas.pydata.org/pandas-docs/stable/api.html#time-date-components @@ -168,7 +168,7 @@ __ https://pandas.pydata.org/pandas-docs/stable/api.html#time-date-components .. jupyter-execute:: - ds["time.dayofyear"] + ds["time.day_of_year"] For use as a derived coordinate, xarray adds ``'season'`` to the list of datetime components supported by pandas: diff --git a/doc/user-guide/weather-climate.rst b/doc/user-guide/weather-climate.rst index 3b31fadaa70..7cc1ae69d61 100644 --- a/doc/user-guide/weather-climate.rst +++ b/doc/user-guide/weather-climate.rst @@ -184,8 +184,8 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: - Access of basic datetime components via the ``dt`` accessor (in this case just "year", "month", "day", "hour", "minute", "second", "microsecond", - "season", "dayofyear", "dayofweek", and "days_in_month") with the addition - of "calendar", absent from pandas: + "season", "day_of_year", "day_of_week", and "days_in_month") with the + addition of "calendar", absent from pandas: .. jupyter-execute:: @@ -201,11 +201,11 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: .. jupyter-execute:: - da.time.dt.dayofyear + da.time.dt.day_of_year .. jupyter-execute:: - da.time.dt.dayofweek + da.time.dt.day_of_week .. jupyter-execute:: diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 66bd461157e..709a16282a4 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -19,6 +19,16 @@ New Features By `Alfonso Ladino `_. - Added complex dtype support to FillValueCoder for the Zarr backend. (:pull:`11151`) By `Max Jones `_. +- Following pandas, xarray's + :py:class:`~xarray.core.accessor_dt.DatetimeAccessor` now supports + :py:attr:`~xarray.core.accessor_dt.DatetimeAccessor.day_of_week` and + :py:attr:`~xarray.core.accessor_dt.DatetimeAccessor.day_of_year` attributes, + which are alternative names for the existing + :py:attr:`~xarray.core.accessor_dt.DatetimeAccessor.dayofweek` and + :py:attr:`~xarray.core.accessor_dt.DatetimeAccessor.dayofyear` attributes. + These alternative attributes have similarly been added to + :py:class:`~xarray.CFTimeIndex` (:pull:`11270`). By `Spencer Clark + `_. Breaking Changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/coding/calendar_ops.py b/xarray/coding/calendar_ops.py index a6f0254a42d..767bf0af2f1 100644 --- a/xarray/coding/calendar_ops.py +++ b/xarray/coding/calendar_ops.py @@ -249,7 +249,7 @@ def _interpolate_day_of_year(times, target_calendar): source_calendar = times.dt.calendar return np.round( _days_in_year(times.dt.year, target_calendar) - * times.dt.dayofyear + * times.dt.day_of_year / _days_in_year(times.dt.year, source_calendar) ).astype(int) @@ -272,7 +272,7 @@ def _random_day_of_year(time, target_calendar, use_cftime): new_doy = np.insert(new_doy, rm_idx - np.arange(5), -1) if _days_in_year(year, source_calendar) == 366: new_doy = np.insert(new_doy, 60, -1) - return new_doy[time.dt.dayofyear - 1] + return new_doy[time.dt.day_of_year - 1] def _convert_to_new_calendar_with_new_day_of_year( diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index 154c39198e0..d9fc4993bd1 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -1623,10 +1623,18 @@ def date_range_like(source, calendar, use_cftime=None): end = convert_time_or_go_back(source_end, date_type) # For the cases where the source ends on the end of the month, we expect the same in the new calendar. - if source_end.day == source_end.daysinmonth and isinstance( + if isinstance(source_end, pd.Timestamp): + source_end_days_in_month = source_end.days_in_month + else: + source_end_days_in_month = source_end.daysinmonth + if isinstance(end, pd.Timestamp): + end_days_in_month = end.days_in_month + else: + end_days_in_month = end.daysinmonth + if source_end.day == source_end_days_in_month and isinstance( freq_as_offset, YearEnd | QuarterEnd | MonthEnd | Day ): - end = end.replace(day=end.daysinmonth) + end = end.replace(day=end_days_in_month) return date_range( start=start.isoformat(), diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index 1aa56317b2d..8a87e26e903 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -127,7 +127,11 @@ def get_date_field(datetimes, field): return np.array([getattr(date, field) for date in datetimes], dtype=np.int64) -def _field_accessor(name, docstring=None, min_cftime_version="0.0"): +def _field_accessor( + name: str, + docstring: str | None = None, + min_cftime_version: str = "0.0", +): """Adapted from pandas.tseries.index._field_accessor""" def f(self, min_cftime_version=min_cftime_version): @@ -249,9 +253,21 @@ class CFTimeIndex(pd.Index): second = _field_accessor("second", "The seconds of the datetime") microsecond = _field_accessor("microsecond", "The microseconds of the datetime") dayofyear = _field_accessor( + "dayofyr", + "The ordinal day of year of the datetime", + "1.0.2.1", + ) + dayofweek = _field_accessor( + "dayofwk", + "The day of week of the datetime", + "1.0.2.1", + ) + day_of_year = _field_accessor( "dayofyr", "The ordinal day of year of the datetime", "1.0.2.1" ) - dayofweek = _field_accessor("dayofwk", "The day of week of the datetime", "1.0.2.1") + day_of_week = _field_accessor( + "dayofwk", "The day of week of the datetime", "1.0.2.1" + ) days_in_month = _field_accessor( "daysinmonth", "The number of days in the month of the datetime", "1.1.0.0" ) diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index 5e0d0fc5333..f8fda9b35eb 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -338,8 +338,8 @@ class DatetimeAccessor(TimeAccessor[T_DataArray]): * time (time) datetime64[us] 80B 2000-01-01 2000-01-02 ... 2000-01-10 >>> ts.dt # doctest: +ELLIPSIS - >>> ts.dt.dayofyear - Size: 80B + >>> ts.dt.day_of_year + Size: 80B array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) Coordinates: * time (time) datetime64[us] 80B 2000-01-01 2000-01-02 ... 2000-01-10 @@ -466,16 +466,19 @@ def weekofyear(self) -> DataArray: week = weekofyear @property - def dayofweek(self) -> T_DataArray: + def day_of_week(self) -> T_DataArray: """The day of the week with Monday=0, Sunday=6""" - return self._date_field("dayofweek", np.int64) + return self._date_field("day_of_week", np.int64) - weekday = dayofweek + dayofweek = day_of_week + weekday = day_of_week @property - def dayofyear(self) -> T_DataArray: + def day_of_year(self) -> T_DataArray: """The ordinal day of the year""" - return self._date_field("dayofyear", np.int64) + return self._date_field("day_of_year", np.int64) + + dayofyear = day_of_year @property def quarter(self) -> T_DataArray: diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index c49a41de108..e9aaed119a9 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -6966,21 +6966,21 @@ def groupby( 1.826e+03], shape=(1827,)) Coordinates: * time (time) datetime64[us] 15kB 2000-01-01 2000-01-02 ... 2004-12-31 - >>> da.groupby("time.dayofyear") - da.groupby("time.dayofyear").mean("time") + >>> da.groupby("time.day_of_year") - da.groupby("time.day_of_year").mean("time") Size: 15kB array([-730.8, -730.8, -730.8, ..., 730.2, 730.2, 730.5], shape=(1827,)) Coordinates: - * time (time) datetime64[us] 15kB 2000-01-01 2000-01-02 ... 2004-12-31 - dayofyear (time) int64 15kB 1 2 3 4 5 6 7 8 ... 360 361 362 363 364 365 366 + * time (time) datetime64[us] 15kB 2000-01-01 2000-01-02 ... 2004-12-31 + day_of_year (time) int64 15kB 1 2 3 4 5 6 7 ... 360 361 362 363 364 365 366 Use a ``Grouper`` object to be more explicit - >>> da.coords["dayofyear"] = da.time.dt.dayofyear - >>> da.groupby(dayofyear=xr.groupers.UniqueGrouper()).mean() - Size: 3kB + >>> da.coords["day_of_year"] = da.time.dt.day_of_year + >>> da.groupby(day_of_year=xr.groupers.UniqueGrouper()).mean() + Size: 3kB array([ 730.8, 731.8, 732.8, ..., 1093.8, 1094.8, 1095.5]) Coordinates: - * dayofyear (dayofyear) int64 3kB 1 2 3 4 5 6 7 ... 361 362 363 364 365 366 + * day_of_year (day_of_year) int64 3kB 1 2 3 4 5 6 ... 361 362 363 364 365 366 >>> da = xr.DataArray( ... data=np.arange(12).reshape((4, 3)), diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index 061898296e6..730323d8370 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -56,13 +56,11 @@ def setup(self): "nanosecond", "week", "weekofyear", - "dayofweek", - "weekday", - "dayofyear", + "day_of_week", + "day_of_year", "quarter", "date", "time", - "daysinmonth", "days_in_month", "is_month_start", "is_month_end", @@ -177,9 +175,8 @@ def test_not_datetime_type(self) -> None: "nanosecond", "week", "weekofyear", - "dayofweek", - "weekday", - "dayofyear", + "day_of_week", + "day_of_year", "quarter", "date", "time", @@ -441,7 +438,7 @@ def times_3d(times): @requires_cftime @pytest.mark.parametrize( - "field", ["year", "month", "day", "hour", "dayofyear", "dayofweek"] + "field", ["year", "month", "day", "hour", "day_of_year", "day_of_week"] ) def test_field_access(data, field) -> None: result = getattr(data.time.dt, field) @@ -533,7 +530,7 @@ def test_cftime_strftime_access(data) -> None: @requires_cftime @requires_dask @pytest.mark.parametrize( - "field", ["year", "month", "day", "hour", "dayofyear", "dayofweek"] + "field", ["year", "month", "day", "hour", "day_of_year", "day_of_week"] ) def test_dask_field_access_1d(data, field) -> None: import dask.array as da @@ -553,7 +550,7 @@ def test_dask_field_access_1d(data, field) -> None: @requires_cftime @requires_dask @pytest.mark.parametrize( - "field", ["year", "month", "day", "hour", "dayofyear", "dayofweek"] + "field", ["year", "month", "day", "hour", "day_of_year", "day_of_week"] ) def test_dask_field_access(times_3d, data, field) -> None: import dask.array as da diff --git a/xarray/tests/test_calendar_ops.py b/xarray/tests/test_calendar_ops.py index 927bebd3f5a..87093bf0786 100644 --- a/xarray/tests/test_calendar_ops.py +++ b/xarray/tests/test_calendar_ops.py @@ -159,7 +159,7 @@ def test_convert_calendar_360_days_random(): # Ensure that added days are evenly distributed in the 5 fifths of each year conv = convert_calendar(da_360, "noleap", align_on="random", missing=np.nan) conv = conv.where(conv.isnull(), drop=True) - nandoys = conv.time.dt.dayofyear[:366] + nandoys = conv.time.dt.day_of_year[:366] assert all(nandoys < np.array([74, 147, 220, 293, 366])) assert all(nandoys > np.array([0, 73, 146, 219, 292])) diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index 8dfb357ebca..5011f1e788b 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -1380,19 +1380,19 @@ def test_calendar_year_length( @pytest.mark.parametrize("freq", ["YE", "ME", "D"]) def test_dayofweek_after_cftime(freq: str) -> None: - result = date_range("2000-02-01", periods=3, freq=freq, use_cftime=True).dayofweek + result = date_range("2000-02-01", periods=3, freq=freq, use_cftime=True).day_of_week # TODO: remove once requiring pandas 2.2+ freq = _new_to_legacy_freq(freq) - expected = pd.date_range("2000-02-01", periods=3, freq=freq).dayofweek + expected = pd.date_range("2000-02-01", periods=3, freq=freq).day_of_week np.testing.assert_array_equal(result, expected) @pytest.mark.parametrize("freq", ["YE", "ME", "D"]) def test_dayofyear_after_cftime(freq: str) -> None: - result = date_range("2000-02-01", periods=3, freq=freq, use_cftime=True).dayofyear + result = date_range("2000-02-01", periods=3, freq=freq, use_cftime=True).day_of_year # TODO: remove once requiring pandas 2.2+ freq = _new_to_legacy_freq(freq) - expected = pd.date_range("2000-02-01", periods=3, freq=freq).dayofyear + expected = pd.date_range("2000-02-01", periods=3, freq=freq).day_of_year np.testing.assert_array_equal(result, expected) diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 8aa79be458e..7f6f391fa6b 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -315,8 +315,8 @@ def test_cftimeindex_field_accessors(index, field, expected): "minute", "second", "microsecond", - "dayofyear", - "dayofweek", + "day_of_year", + "day_of_week", "days_in_month", ], ) @@ -329,16 +329,18 @@ def test_empty_cftimeindex_field_accessors(field): @requires_cftime -def test_cftimeindex_dayofyear_accessor(index): - result = index.dayofyear +@pytest.mark.parametrize("field", ["day_of_year", "dayofyear"]) +def test_cftimeindex_dayofyear_accessor(index, field): + result = getattr(index, field) expected = np.array([date.dayofyr for date in index], dtype=np.int64) assert_array_equal(result, expected) assert result.dtype == expected.dtype @requires_cftime -def test_cftimeindex_dayofweek_accessor(index): - result = index.dayofweek +@pytest.mark.parametrize("field", ["day_of_week", "dayofweek"]) +def test_cftimeindex_dayofweek_accessor(index, field): + result = getattr(index, field) expected = np.array([date.dayofwk for date in index], dtype=np.int64) assert_array_equal(result, expected) assert result.dtype == expected.dtype diff --git a/xarray/tests/test_cupy.py b/xarray/tests/test_cupy.py index 94776902c11..29b7055ca30 100644 --- a/xarray/tests/test_cupy.py +++ b/xarray/tests/test_cupy.py @@ -21,7 +21,7 @@ def toy_weather_data(): """ np.random.seed(123) times = pd.date_range("2000-01-01", "2001-12-31", name="time") - annual_cycle = np.sin(2 * np.pi * (times.dayofyear.values / 365.25 - 0.28)) + annual_cycle = np.sin(2 * np.pi * (times.day_of_year.values / 365.25 - 0.28)) base = 10 + 15 * annual_cycle.reshape(-1, 1) tmin_values = base + 3 * np.random.randn(annual_cycle.size, 3) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 5e514cc9767..02a823e5295 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1481,8 +1481,8 @@ def test_virtual_time_components(self) -> None: dates = pd.date_range("2000-01-01", periods=10) da = DataArray(np.arange(1, 11), [("time", dates)]) - assert_array_equal(da["time.dayofyear"], da.values) - assert_array_equal(da.coords["time.dayofyear"], da.values) + assert_array_equal(da["time.day_of_year"], da.values) + assert_array_equal(da.coords["time.day_of_year"], da.values) def test_coords(self) -> None: # use int64 to ensure repr() consistency on windows diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index cad40e78821..c441450cd8d 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1737,7 +1737,7 @@ def test_sel(self) -> None: times = pd.date_range("2000-01-01", periods=3) assert_equal(data.isel(time=slice(3)), data.sel(time=times)) assert_equal( - data.isel(time=slice(3)), data.sel(time=(data["time.dayofyear"] <= 3)) + data.isel(time=slice(3)), data.sel(time=(data["time.day_of_year"] <= 3)) ) td = pd.to_timedelta(np.arange(3), unit="days") @@ -4580,11 +4580,11 @@ def test_virtual_variables_time(self) -> None: assert_array_equal(data["time.month"].values, index.month) assert_array_equal(data["time.season"].values, "DJF") # test virtual variable math - assert_array_equal(data["time.dayofyear"] + 1, 2 + np.arange(20)) - assert_array_equal(np.sin(data["time.dayofyear"]), np.sin(1 + np.arange(20))) + assert_array_equal(data["time.day_of_year"] + 1, 2 + np.arange(20)) + assert_array_equal(np.sin(data["time.day_of_year"]), np.sin(1 + np.arange(20))) # ensure they become coordinates - expected = Dataset({}, {"dayofyear": data["time.dayofyear"]}) - actual = data[["time.dayofyear"]] + expected = Dataset({}, {"day_of_year": data["time.day_of_year"]}) + actual = data[["time.day_of_year"]] assert_equal(expected, actual) # non-coordinate variables ds = Dataset({"t": ("x", pd.date_range("2000-01-01", periods=3))}) @@ -4607,9 +4607,10 @@ def test_time_season(self) -> None: def test_slice_virtual_variable(self) -> None: data = create_test_data() assert_equal( - data["time.dayofyear"][:10].variable, Variable(["time"], 1 + np.arange(10)) + data["time.day_of_year"][:10].variable, + Variable(["time"], 1 + np.arange(10)), ) - assert_equal(data["time.dayofyear"][0].variable, Variable([], 1)) + assert_equal(data["time.day_of_year"][0].variable, Variable([], 1)) def test_setitem(self) -> None: # assign a variable diff --git a/xarray/tests/test_formatting_html.py b/xarray/tests/test_formatting_html.py index 8f1358b755f..5160cc29066 100644 --- a/xarray/tests/test_formatting_html.py +++ b/xarray/tests/test_formatting_html.py @@ -95,7 +95,7 @@ def multiindex() -> xr.Dataset: @pytest.fixture def dataset() -> xr.Dataset: times = pd.date_range("2000-01-01", "2001-12-31", name="time") - annual_cycle = np.sin(2 * np.pi * (times.dayofyear.values / 365.25 - 0.28)) + annual_cycle = np.sin(2 * np.pi * (times.day_of_year.values / 365.25 - 0.28)) base = 10 + 15 * annual_cycle.reshape(-1, 1) tmin_values = base + 3 * np.random.randn(annual_cycle.size, 3) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index fcf1124c18e..49f01a4bcb8 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -2921,7 +2921,7 @@ def reset(self): def test_weather_data_resample(use_flox): # from the docs times = pd.date_range("2000-01-01", "2001-12-31", name="time") - annual_cycle = np.sin(2 * np.pi * (times.dayofyear.values / 365.25 - 0.28)) + annual_cycle = np.sin(2 * np.pi * (times.day_of_year.values / 365.25 - 0.28)) base = 10 + 15 * annual_cycle.reshape(-1, 1) tmin_values = base + 3 * np.random.randn(annual_cycle.size, 3)