From b22fbc8f72408fc45f9648bad76d8ba47df8b664 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 9 Apr 2026 15:55:00 +0200
Subject: [PATCH 1/8] Parquet IO: also use zoneinfo timezones by default even
 when pyarrow uses pytz

---
 pandas/core/frame.py              | 12 +++++
 pandas/core/generic.py            |  2 +-
 pandas/core/internals/managers.py |  8 ++++
 pandas/io/_util.py                | 74 ++++++++++++++++++++++++++++++-
 4 files changed, 94 insertions(+), 2 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 6133fba8cf8ec..9dd24df0e20fe 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5477,6 +5477,18 @@ def predicate(arr: ArrayLike) -> bool:
         mgr = self._mgr._get_data_subset(predicate).copy(deep=False)
         return self._constructor_from_mgr(mgr, axes=mgr.axes).__finalize__(self)
 
+    def _select_dtypes_indices(self, dtype_class) -> np.ndarray:
+        """
+        Return the indices of the columns of a given dtype.
+
+        Currently only works given a class, so mostly useful for ExtensionDtypes.
+        """
+
+        def predicate(arr: ArrayLike) -> bool:
+            return isinstance(arr.dtype, dtype_class)
+
+        return self._mgr._get_data_subset_indices(predicate)
+
     def insert(
         self,
         loc: int,
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 00a337f075f05..a9c00b73faf97 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -7127,7 +7127,7 @@ def fillna(
             if axis == 1:
                 # Check that all columns in result have the same dtype
                 # otherwise don't bother with fillna and losing accurate dtypes
-                unique_dtypes = algos.unique(self._mgr.get_dtypes())
+                unique_dtypes = self._mgr.get_unique_dtypes()
                 if len(unique_dtypes) > 1:
                     raise ValueError(
                         "All columns must have the same dtype, but got dtypes: "
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index e82f2fb043d0d..771134f2081b4 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -336,6 +336,9 @@ def references_same_values(self, mgr: BaseBlockManager, blkno: int) -> bool:
         blk = self.blocks[blkno]
         return any(blk is ref() for ref in mgr.blocks[blkno].refs.referenced_blocks)
 
+    def get_unique_dtypes(self) -> npt.NDArray[np.object_]:
+        return algos.unique([blk.dtype for blk in self.blocks])
+
     def get_dtypes(self) -> npt.NDArray[np.object_]:
         dtypes = np.array([blk.dtype for blk in self.blocks], dtype=object)
         return dtypes.take(self.blknos)
@@ -656,6 +659,11 @@ def _get_data_subset(self, predicate: Callable) -> Self:
         blocks = [blk for blk in self.blocks if predicate(blk.values)]
         return self._combine(blocks)
 
+    def _get_data_subset_indices(self, predicate: Callable) -> np.ndarray:
+        blocks = [blk for blk in self.blocks if predicate(blk.values)]
+        indexer = np.sort(np.concatenate([b.mgr_locs.as_array for b in blocks]))
+        return indexer
+
     def get_bool_data(self) -> Self:
         """
         Select blocks that are bool-dtype and columns from object-dtype blocks
diff --git a/pandas/io/_util.py b/pandas/io/_util.py
index da9ac3913cbbd..72a50b1e25ce7 100644
--- a/pandas/io/_util.py
+++ b/pandas/io/_util.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import datetime as dt
 from typing import (
     TYPE_CHECKING,
     Literal,
@@ -10,6 +11,7 @@
 from pandas._config import using_string_dtype
 
 from pandas._libs import lib
+from pandas._libs.tslibs import timezones
 from pandas.compat import (
     pa_version_under18p0,
     pa_version_under19p0,
@@ -35,6 +37,9 @@
     )
 
 
+pytz = import_optional_dependency("pytz", errors="ignore")
+
+
 def _arrow_dtype_mapping() -> dict:
     pa = import_optional_dependency("pyarrow")
     return {
@@ -120,7 +125,9 @@ def arrow_table_to_pandas(
         raise NotImplementedError
 
     df = table.to_pandas(types_mapper=types_mapper, **to_pandas_kwargs)
-    return _post_convert_dtypes(df, dtype_backend, dtype, names)
+    df = _post_convert_dtypes(df, dtype_backend, dtype, names)
+    df = _normalize_timezone_dtypes(df)
+    return df
 
 
 def _post_convert_dtypes(
@@ -189,3 +196,68 @@ def _post_convert_dtypes(
                     df[col] = df[col].astype(cat_dtype)
 
     return df
+
+
+def _normalize_pytz_timezone(tz: dt.tzinfo) -> dt.tzinfo:
+    """
+    If the input tz is a pytz timezone, attempt to convert it to "default"
+    tzinfo object (zoneinfo or datetime.timezone).
+    """
+    if not type(tz).__module__.startswith("pytz"):
+        # isinstance(col.dtype.tz, pytz.BaseTzInfo) does not included
+        # fixed offsets
+        return tz
+
+    if timezones.is_utc(tz):
+        return timezones.maybe_get_tz("UTC")
+
+    if timezones.is_fixed_offset(tz):
+        # Convert pytz fixed offset to datetime.timezone
+        try:
+            offset = tz.utcoffset(None)
+            if offset is not None:
+                return dt.timezone(offset)
+        except Exception:
+            pass
+
+    zone = timezones.get_timezone(tz)
+    if isinstance(zone, str):
+        try:
+            return timezones.maybe_get_tz(zone)
+        except Exception:
+            # some pytz timezones might not be available for zoneinfo
+            pass
+
+    return tz
+
+
+def _normalize_timezone_index(index: pd.Index) -> pd.Index:
+    if isinstance(index, pd.MultiIndex):
+        levels = [_normalize_timezone_index(level) for level in index.levels]
+        return index.set_levels(levels)
+
+    if isinstance(index.dtype, pd.DatetimeTZDtype):
+        normalized_tz = _normalize_pytz_timezone(index.dtype.tz)
+        if normalized_tz is not index.dtype.tz:
+            return index.tz_convert(normalized_tz)
+
+    return index
+
+
+def _normalize_timezone_dtypes(df: pd.DataFrame) -> pd.DataFrame:
+    if pytz is not None:
+        # Convert any pytz timezones to zoneinfo / fixed offset timezones
+        if any(
+            isinstance(dtype, pd.DatetimeTZDtype)
+            for dtype in df._mgr.get_unique_dtypes()
+        ):
+            col_indices = df._select_dtypes_indices(pd.DatetimeTZDtype)
+            for i in col_indices:
+                col = df.iloc[:, i]
+                normalized_tz = _normalize_pytz_timezone(col.dtype.tz)
+                if normalized_tz is not col.dtype.tz:
+                    df.isetitem(i, col.dt.tz_convert(normalized_tz))
+
+    df.index = _normalize_timezone_index(df.index)
+    df.columns = _normalize_timezone_index(df.columns)
+    return df

From 524ff5853542f6b4a98dfbab8632edbab4fddc6d Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 9 Apr 2026 17:53:30 +0200
Subject: [PATCH 2/8] fixup + update test for tzaware index now no longer
 returning pytz

---
 pandas/core/internals/managers.py |  2 +-
 pandas/tests/io/test_parquet.py   | 26 +-------------------------
 2 files changed, 2 insertions(+), 26 deletions(-)

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 771134f2081b4..94b4f581c44db 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -337,7 +337,7 @@ def references_same_values(self, mgr: BaseBlockManager, blkno: int) -> bool:
         return any(blk is ref() for ref in mgr.blocks[blkno].refs.referenced_blocks)
 
     def get_unique_dtypes(self) -> npt.NDArray[np.object_]:
-        return algos.unique([blk.dtype for blk in self.blocks])
+        return algos.unique(np.array([blk.dtype for blk in self.blocks], dtype=object))
 
     def get_dtypes(self) -> npt.NDArray[np.object_]:
         dtypes = np.array([blk.dtype for blk in self.blocks], dtype=object)
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index c5922b6b5a9a4..aff16c58f8c28 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -1068,31 +1068,7 @@ def test_timestamp_nanoseconds(self, pa, temp_file):
     def test_timezone_aware_index(self, pa, timezone_aware_date_list, temp_file):
         idx = 5 * [timezone_aware_date_list]
         df = pd.DataFrame(index=idx, data={"index_as_col": idx})
-
-        # see gh-36004
-        # compare time(zone) values only, skip their class:
-        # pyarrow always creates fixed offset timezones using pytz.FixedOffset()
-        # even if it was datetime.timezone() originally
-        #
-        # technically they are the same:
-        # they both implement datetime.tzinfo
-        # they both wrap datetime.timedelta()
-        # this use-case sets the resolution to 1 minute
-
-        expected = df[:]
-        if timezone_aware_date_list.tzinfo != datetime.UTC:
-            # pyarrow returns pytz.FixedOffset while pandas constructs datetime.timezone
-            # https://github.com/pandas-dev/pandas/issues/37286
-            try:
-                import pytz
-            except ImportError:
-                pass
-            else:
-                offset = df.index.tz.utcoffset(timezone_aware_date_list)
-                tz = pytz.FixedOffset(offset.total_seconds() / 60)
-                expected.index = expected.index.tz_convert(tz)
-                expected["index_as_col"] = expected["index_as_col"].dt.tz_convert(tz)
-        check_round_trip(df, temp_file, pa, check_dtype=False, expected=expected)
+        check_round_trip(df, temp_file, pa, check_dtype=False)
 
     def test_filter_row_groups(self, pa, temp_file):
         # https://github.com/pandas-dev/pandas/issues/26551

From 6cab7d79dca9ea986edd5a82cf13d540a81b3375 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 9 Apr 2026 21:51:22 +0200
Subject: [PATCH 3/8] fix normalize logic for static timezone

---
 pandas/io/_util.py                    | 16 +++++++---------
 pandas/tests/tslibs/test_timezones.py | 16 ++++++++++++++++
 2 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/pandas/io/_util.py b/pandas/io/_util.py
index 72a50b1e25ce7..95e84021ab9f7 100644
--- a/pandas/io/_util.py
+++ b/pandas/io/_util.py
@@ -211,21 +211,19 @@ def _normalize_pytz_timezone(tz: dt.tzinfo) -> dt.tzinfo:
     if timezones.is_utc(tz):
         return timezones.maybe_get_tz("UTC")
 
-    if timezones.is_fixed_offset(tz):
-        # Convert pytz fixed offset to datetime.timezone
+    if tz.zone is not None:
         try:
-            offset = tz.utcoffset(None)
-            if offset is not None:
-                return dt.timezone(offset)
+            return timezones.maybe_get_tz(tz.zone)
         except Exception:
+            # some pytz timezones might not be available for zoneinfo
             pass
 
-    zone = timezones.get_timezone(tz)
-    if isinstance(zone, str):
+    if timezones.is_fixed_offset(tz):
+        # Convert pytz fixed offset to datetime.timezone
         try:
-            return timezones.maybe_get_tz(zone)
+            offset = tz.utcoffset(None)
+            return dt.timezone(offset)
         except Exception:
-            # some pytz timezones might not be available for zoneinfo
             pass
 
     return tz
diff --git a/pandas/tests/tslibs/test_timezones.py b/pandas/tests/tslibs/test_timezones.py
index c48986c597356..33b05bc34eccd 100644
--- a/pandas/tests/tslibs/test_timezones.py
+++ b/pandas/tests/tslibs/test_timezones.py
@@ -6,6 +6,7 @@
 import subprocess
 import sys
 import textwrap
+import zoneinfo
 
 import dateutil.tz
 import pytest
@@ -191,3 +192,18 @@ def test_maybe_get_tz_offset_only():
 
     tz = timezones.maybe_get_tz("UTC-02:45")
     assert tz == timezone(-timedelta(hours=2, minutes=45))
+
+
+def test_normalize_pytz_timezone():
+    pytz = pytest.importorskip("pytz")
+
+    from pandas.io._util import _normalize_pytz_timezone
+
+    for tz, expected in [
+        (pytz.UTC, timezone.utc),
+        (pytz.FixedOffset(90), timezone(timedelta(minutes=90))),
+        (pytz.timezone("America/New_York"), zoneinfo.ZoneInfo("America/New_York")),
+        (pytz.timezone("Etc/GMT+1"), zoneinfo.ZoneInfo("Etc/GMT+1")),
+    ]:
+        result = _normalize_pytz_timezone(tz)
+        assert result == expected

From 6b1fcaf1017da433a00b38b0aeffcb4c1ff58a0c Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 9 Apr 2026 21:52:49 +0200
Subject: [PATCH 4/8] update parser test for pyarrow engine

---
 pandas/tests/io/parser/test_parse_dates.py | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py
index 7aed7acb8e50d..41890396192a3 100644
--- a/pandas/tests/io/parser/test_parse_dates.py
+++ b/pandas/tests/io/parser/test_parse_dates.py
@@ -217,14 +217,8 @@ def test_parse_tz_aware(all_parsers):
     expected = DataFrame(
         {"x": [0.5]}, index=Index([Timestamp("2012-06-13 01:39:00+00:00")], name="Date")
     )
-    if parser.engine == "pyarrow":
-        pytz = pytest.importorskip("pytz")
-        expected_tz = pytz.utc
-        expected.index = expected.index.as_unit("s")
-    else:
-        expected_tz = timezone.utc
     tm.assert_frame_equal(result, expected)
-    assert result.index.tz is expected_tz
+    assert result.index.tz is timezone.utc
 
 
 @pytest.mark.parametrize("kwargs", [{}, {"index_col": "C"}])

From 55d4e4b17d7b7c9fecebf3a9b78f3c4d07d812bc Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 9 Apr 2026 22:13:23 +0200
Subject: [PATCH 5/8] add docstring + link to pyarrow PR

---
 pandas/core/frame.py |  2 +-
 pandas/io/_util.py   | 15 +++++++++++++--
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 9dd24df0e20fe..d30e475841d90 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5450,7 +5450,7 @@ def predicate(arr: ArrayLike) -> bool:
 
             return True
 
-        blk_dtypes = [blk.dtype for blk in self._mgr.blocks]
+        blk_dtypes = self._mgr.get_unique_dtypes()
         if (
             np.object_ in include
             and str not in include
diff --git a/pandas/io/_util.py b/pandas/io/_util.py
index 95e84021ab9f7..c88ebbf645d21 100644
--- a/pandas/io/_util.py
+++ b/pandas/io/_util.py
@@ -231,8 +231,11 @@ def _normalize_pytz_timezone(tz: dt.tzinfo) -> dt.tzinfo:
 
 def _normalize_timezone_index(index: pd.Index) -> pd.Index:
     if isinstance(index, pd.MultiIndex):
-        levels = [_normalize_timezone_index(level) for level in index.levels]
-        return index.set_levels(levels)
+        if any(isinstance(level.dtype, pd.DatetimeTZDtype) for level in index.levels):
+            levels = [_normalize_timezone_index(level) for level in index.levels]
+            return index.set_levels(levels)
+
+        return index
 
     if isinstance(index.dtype, pd.DatetimeTZDtype):
         normalized_tz = _normalize_pytz_timezone(index.dtype.tz)
@@ -243,6 +246,13 @@ def _normalize_timezone_index(index: pd.Index) -> pd.Index:
 
 
 def _normalize_timezone_dtypes(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    PyArrow uses pytz by default for timezones, but pandas uses
+    zoneinfo / datetime.timezone since pandas 3.0.
+
+    TODO: Starting with pyarrow 25, it will use zoneinfo by default, and then
+    this normalization can be skipped (https://github.com/apache/arrow/pull/49694).
+    """
     if pytz is not None:
         # Convert any pytz timezones to zoneinfo / fixed offset timezones
         if any(
@@ -258,4 +268,5 @@ def _normalize_timezone_dtypes(df: pd.DataFrame) -> pd.DataFrame:
 
     df.index = _normalize_timezone_index(df.index)
     df.columns = _normalize_timezone_index(df.columns)
+
     return df

From 6a11335c95d26b9b68df474680a43d15e5e1c62c Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 10 Apr 2026 10:43:20 +0200
Subject: [PATCH 6/8] fix expected unit in parser test

---
 pandas/tests/io/parser/test_parse_dates.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py
index 41890396192a3..41effd4c2896e 100644
--- a/pandas/tests/io/parser/test_parse_dates.py
+++ b/pandas/tests/io/parser/test_parse_dates.py
@@ -217,6 +217,8 @@ def test_parse_tz_aware(all_parsers):
     expected = DataFrame(
         {"x": [0.5]}, index=Index([Timestamp("2012-06-13 01:39:00+00:00")], name="Date")
     )
+    if parser.engine == "pyarrow":
+        expected.index = expected.index.as_unit("s")
     tm.assert_frame_equal(result, expected)
     assert result.index.tz is timezone.utc
 

From bca10e8f23f554676ac66bd2e6d447fdd6ab1471 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 10 Apr 2026 10:45:15 +0200
Subject: [PATCH 7/8] fix/suppress typing failures

---
 pandas/io/_util.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/pandas/io/_util.py b/pandas/io/_util.py
index c88ebbf645d21..72a8b2e8f0ef6 100644
--- a/pandas/io/_util.py
+++ b/pandas/io/_util.py
@@ -5,6 +5,7 @@
     TYPE_CHECKING,
     Literal,
 )
+import zoneinfo
 
 import numpy as np
 
@@ -209,11 +210,11 @@ def _normalize_pytz_timezone(tz: dt.tzinfo) -> dt.tzinfo:
         return tz
 
     if timezones.is_utc(tz):
-        return timezones.maybe_get_tz("UTC")
+        return dt.timezone.utc
 
-    if tz.zone is not None:
+    if tz.zone is not None:  # type: ignore[attr-defined]
         try:
-            return timezones.maybe_get_tz(tz.zone)
+            return zoneinfo.ZoneInfo(tz.zone)  # type: ignore[attr-defined]
         except Exception:
             # some pytz timezones might not be available for zoneinfo
             pass
@@ -222,7 +223,8 @@ def _normalize_pytz_timezone(tz: dt.tzinfo) -> dt.tzinfo:
         # Convert pytz fixed offset to datetime.timezone
         try:
             offset = tz.utcoffset(None)
-            return dt.timezone(offset)
+            if offset is not None:
+                return dt.timezone(offset)
         except Exception:
             pass
 
@@ -240,7 +242,7 @@ def _normalize_timezone_index(index: pd.Index) -> pd.Index:
     if isinstance(index.dtype, pd.DatetimeTZDtype):
         normalized_tz = _normalize_pytz_timezone(index.dtype.tz)
         if normalized_tz is not index.dtype.tz:
-            return index.tz_convert(normalized_tz)
+            return index.tz_convert(normalized_tz)  # type: ignore[attr-defined]
 
     return index
 

From 0e5e05be79f262eef3ef6e2996c99f7c45eccf14 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 10 Apr 2026 14:21:47 +0200
Subject: [PATCH 8/8] add whatsnew

---
 doc/source/whatsnew/v3.0.3.rst | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/doc/source/whatsnew/v3.0.3.rst b/doc/source/whatsnew/v3.0.3.rst
index f8987257b4858..9b1b50a4ff114 100644
--- a/doc/source/whatsnew/v3.0.3.rst
+++ b/doc/source/whatsnew/v3.0.3.rst
@@ -8,6 +8,20 @@ including other versions of pandas.
 
 {{ header }}
 
+.. ---------------------------------------------------------------------------
+.. _whatsnew_303.enhancements:
+
+Enhancements
+~~~~~~~~~~~~
+- Starting with pandas 3.0.0, time zones are represented by default using the
+  standard library's :mod:`zoneinfo` module (or ``datetime.timezone`` for fixed
+  offsets) instead of using ``pytz`` (:ref:`release note  <whatsnew_300.api_breaking.pytz>`).
+
+  The IO methods using ``pyarrow`` under the hood such as :func:`read_parquet`,
+  :func:`read_feather` and :func:`read_orc` (or :func:`read_csv` when specifying
+  the engine) were still returning timezone using ``pytz``. Those have now been
+  updated to consistently use default ``zoneinfo`` time zones as well (:issue:`65134`).
+
 .. ---------------------------------------------------------------------------
 .. _whatsnew_303.regressions: