pandas-dev · jbrockmendel · Oct 22, 2025 · Oct 22, 2025 · Oct 22, 2025 · Oct 23, 2025
diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst
@@ -241,6 +241,19 @@ inferred frequency upon creation:
 
     pd.DatetimeIndex(["2018-01-01", "2018-01-03", "2018-01-05"], freq="infer")
 
+In most cases, parsing strings to datetimes (with any of :func:`to_datetime`, :class:`DatetimeIndex`, or :class:`Timestamp`) will produce objects with microsecond ("us") unit. The exception to this rule is if your strings have nanosecond precision, in which case the result will have "ns" unit:
+
+.. ipython:: python
+
+   pd.to_datetime(["2016-01-01 02:03:04"]).unit
+   pd.to_datetime(["2016-01-01 02:03:04.123"]).unit
+   pd.to_datetime(["2016-01-01 02:03:04.123456"]).unit
+   pd.to_datetime(["2016-01-01 02:03:04.123456789"]).unit
+
+.. versionchanged:: 3.0.0
+
+        Previously, :func:`to_datetime` and :class:`DatetimeIndex` would always parse strings to "ns" unit. During pandas 2.x, :class:`Timestamp` could give any of "s", "ms", "us", or "ns" depending on the specificity of the input string.
+
 .. _timeseries.converting.format:
 
 Providing a format argument
@@ -379,6 +392,16 @@ We subtract the epoch (midnight at January 1, 1970 UTC) and then floor divide by
 
    (stamps - pd.Timestamp("1970-01-01")) // pd.Timedelta("1s")
 
+Another common way to perform this conversion is to convert directly to an integer dtype. Note that the exact integers this produces will depend on the specific unit
+or resolution of the datetime64 dtype:
+
+.. ipython:: python
+
+   stamps.astype(np.int64)
+   stamps.astype("datetime64[s]").astype(np.int64)
+   stamps.astype("datetime64[ms]").astype(np.int64)
+
+
 .. _timeseries.origin:
 
 Using the ``origin`` parameter

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -358,7 +358,7 @@ When passing strings, the resolution will depend on the precision of the string,
     In [5]: pd.to_datetime(["2024-03-22 11:43:01.002003004"]).dtype
     Out[5]: dtype('<M8[ns]')
 
-The inferred resolution now matches that of the input strings:
+The inferred resolution now matches that of the input strings for nanosecond-precision strings, otherwise defaulting to microseconds:
 
 .. ipython:: python
 
@@ -367,13 +367,17 @@ The inferred resolution now matches that of the input strings:
     In [4]: pd.to_datetime(["2024-03-22 11:43:01.002003"]).dtype
     In [5]: pd.to_datetime(["2024-03-22 11:43:01.002003004"]).dtype
 
+This is also a change for the :class:`Timestamp` constructor with a string input, which in version 2.x.y could give second or millisecond unit, which users generally disliked (:issue:`52653`)
+
 In cases with mixed-resolution inputs, the highest resolution is used:
 
 .. code-block:: ipython
 
     In [2]: pd.to_datetime([pd.Timestamp("2024-03-22 11:43:01"), "2024-03-22 11:43:01.002"]).dtype
     Out[2]: dtype('<M8[ns]')
 
+Many users will now get "M8[us]" dtype  data in cases when they used to get "M8[ns]". For most use cases they should not notice a difference. One big exception is converting to integers, which will give integers 1000x smaller.
+
 .. _whatsnew_300.api_breaking.concat_datetime_sorting:
 
 :func:`concat` no longer ignores ``sort`` when all objects have a :class:`DatetimeIndex`

diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx
@@ -623,6 +623,8 @@ cdef _TSObject convert_str_to_tsobject(str ts, tzinfo tz,
             )
             if not string_to_dts_failed:
                 reso = get_supported_reso(out_bestunit)
+                if reso < NPY_FR_us:
+                    reso = NPY_FR_us
                 check_dts_bounds(&dts, reso)
                 obj = _TSObject()
                 obj.dts = dts
@@ -661,6 +663,8 @@ cdef _TSObject convert_str_to_tsobject(str ts, tzinfo tz,
             nanos=&nanos,
         )
         reso = get_supported_reso(out_bestunit)
+        if reso < NPY_FR_us:
+            reso = NPY_FR_us
         return convert_datetime_to_tsobject(dt, tz, nanos=nanos, reso=reso)
 
 

diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx
@@ -466,6 +466,8 @@ def array_strptime(
                 # No error reported by string_to_dts, pick back up
                 # where we left off
                 item_reso = get_supported_reso(out_bestunit)
+                if item_reso < NPY_DATETIMEUNIT.NPY_FR_us:
+                    item_reso = NPY_DATETIMEUNIT.NPY_FR_us
                 state.update_creso(item_reso)
                 if infer_reso:
                     creso = state.creso
@@ -510,6 +512,8 @@ def array_strptime(
                 val, fmt, exact, format_regex, locale_time, &dts, &item_reso
             )
 
+            if item_reso < NPY_DATETIMEUNIT.NPY_FR_us:
+                item_reso = NPY_DATETIMEUNIT.NPY_FR_us
             state.update_creso(item_reso)
             if infer_reso:
                 creso = state.creso

diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -938,7 +938,7 @@ def rand_series_with_duplicate_datetimeindex() -> Series:
         (Period("2012-01", freq="M"), "period[M]"),
         (Period("2012-02-01", freq="D"), "period[D]"),
         (
-            Timestamp("2011-01-01", tz="US/Eastern"),
+            Timestamp("2011-01-01", tz="US/Eastern").as_unit("s"),
             DatetimeTZDtype(unit="s", tz="US/Eastern"),
         ),
         (Timedelta(seconds=500), "timedelta64[ns]"),

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -370,7 +370,7 @@ def unique(values):
     array([2, 1])
 
     >>> pd.unique(pd.Series([pd.Timestamp("20160101"), pd.Timestamp("20160101")]))
-    array(['2016-01-01T00:00:00'], dtype='datetime64[s]')
+    array(['2016-01-01T00:00:00.000000'], dtype='datetime64[us]')
 
     >>> pd.unique(
     ...     pd.Series(

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -1918,11 +1918,11 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]:
 
     >>> rng_tz.floor("2h", ambiguous=False)
     DatetimeIndex(['2021-10-31 02:00:00+01:00'],
-                  dtype='datetime64[s, Europe/Amsterdam]', freq=None)
+                  dtype='datetime64[us, Europe/Amsterdam]', freq=None)
 
     >>> rng_tz.floor("2h", ambiguous=True)
     DatetimeIndex(['2021-10-31 02:00:00+02:00'],
-                  dtype='datetime64[s, Europe/Amsterdam]', freq=None)
+                  dtype='datetime64[us, Europe/Amsterdam]', freq=None)
     """
 
 _floor_example = """>>> rng.floor('h')
@@ -1945,11 +1945,11 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]:
 
     >>> rng_tz.floor("2h", ambiguous=False)
     DatetimeIndex(['2021-10-31 02:00:00+01:00'],
-                 dtype='datetime64[s, Europe/Amsterdam]', freq=None)
+                 dtype='datetime64[us, Europe/Amsterdam]', freq=None)
 
     >>> rng_tz.floor("2h", ambiguous=True)
     DatetimeIndex(['2021-10-31 02:00:00+02:00'],
-                  dtype='datetime64[s, Europe/Amsterdam]', freq=None)
+                  dtype='datetime64[us, Europe/Amsterdam]', freq=None)
     """
 
 _ceil_example = """>>> rng.ceil('h')
@@ -1972,11 +1972,11 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]:
 
     >>> rng_tz.ceil("h", ambiguous=False)
     DatetimeIndex(['2021-10-31 02:00:00+01:00'],
-                  dtype='datetime64[s, Europe/Amsterdam]', freq=None)
+                  dtype='datetime64[us, Europe/Amsterdam]', freq=None)
 
     >>> rng_tz.ceil("h", ambiguous=True)
     DatetimeIndex(['2021-10-31 02:00:00+02:00'],
-                  dtype='datetime64[s, Europe/Amsterdam]', freq=None)
+                  dtype='datetime64[us, Europe/Amsterdam]', freq=None)
     """
 
 

diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
@@ -220,7 +220,7 @@ class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps):
     ... )
     <DatetimeArray>
     ['2023-01-01 00:00:00', '2023-01-02 00:00:00']
-    Length: 2, dtype: datetime64[s]
+    Length: 2, dtype: datetime64[us]
     """
 
     __module__ = "pandas.arrays"
@@ -612,7 +612,7 @@ def tz(self) -> tzinfo | None:
         >>> s
         0   2020-01-01 10:00:00+00:00
         1   2020-02-01 11:00:00+00:00
-        dtype: datetime64[s, UTC]
+        dtype: datetime64[us, UTC]
         >>> s.dt.tz
         datetime.timezone.utc
 
@@ -1441,7 +1441,7 @@ def time(self) -> npt.NDArray[np.object_]:
         >>> s
         0   2020-01-01 10:00:00+00:00
         1   2020-02-01 11:00:00+00:00
-        dtype: datetime64[s, UTC]
+        dtype: datetime64[us, UTC]
         >>> s.dt.time
         0    10:00:00
         1    11:00:00
@@ -1484,7 +1484,7 @@ def timetz(self) -> npt.NDArray[np.object_]:
         >>> s
         0   2020-01-01 10:00:00+00:00
         1   2020-02-01 11:00:00+00:00
-        dtype: datetime64[s, UTC]
+        dtype: datetime64[us, UTC]
         >>> s.dt.timetz
         0    10:00:00+00:00
         1    11:00:00+00:00
@@ -1526,7 +1526,7 @@ def date(self) -> npt.NDArray[np.object_]:
         >>> s
         0   2020-01-01 10:00:00+00:00
         1   2020-02-01 11:00:00+00:00
-        dtype: datetime64[s, UTC]
+        dtype: datetime64[us, UTC]
         >>> s.dt.date
         0    2020-01-01
         1    2020-02-01
@@ -1875,7 +1875,7 @@ def isocalendar(self) -> DataFrame:
         >>> s
         0   2020-01-01 10:00:00+00:00
         1   2020-02-01 11:00:00+00:00
-        dtype: datetime64[s, UTC]
+        dtype: datetime64[us, UTC]
         >>> s.dt.dayofyear
         0    1
         1   32
@@ -1911,7 +1911,7 @@ def isocalendar(self) -> DataFrame:
         >>> s
         0   2020-01-01 10:00:00+00:00
         1   2020-04-01 11:00:00+00:00
-        dtype: datetime64[s, UTC]
+        dtype: datetime64[us, UTC]
         >>> s.dt.quarter
         0    1
         1    2
@@ -1947,7 +1947,7 @@ def isocalendar(self) -> DataFrame:
         >>> s
         0   2020-01-01 10:00:00+00:00
         1   2020-02-01 11:00:00+00:00
-        dtype: datetime64[s, UTC]
+        dtype: datetime64[us, UTC]
         >>> s.dt.daysinmonth
         0    31
         1    29

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -1380,7 +1380,7 @@ def factorize(
         0   2000-03-11
         1   2000-03-12
         2   2000-03-13
-        dtype: datetime64[s]
+        dtype: datetime64[us]
 
         >>> ser.searchsorted('3/14/2000')
         np.int64(3)

diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py
@@ -150,7 +150,7 @@ def isna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame:
     >>> index = pd.DatetimeIndex(["2017-07-05", "2017-07-06", None, "2017-07-08"])
     >>> index
     DatetimeIndex(['2017-07-05', '2017-07-06', 'NaT', '2017-07-08'],
-                  dtype='datetime64[s]', freq=None)
+                  dtype='datetime64[us]', freq=None)
     >>> pd.isna(index)
     array([False, False,  True, False])
 
@@ -365,7 +365,7 @@ def notna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame:
     >>> index = pd.DatetimeIndex(["2017-07-05", "2017-07-06", None, "2017-07-08"])
     >>> index
     DatetimeIndex(['2017-07-05', '2017-07-06', 'NaT', '2017-07-08'],
-                  dtype='datetime64[s]', freq=None)
+                  dtype='datetime64[us]', freq=None)
     >>> pd.notna(index)
     array([ True,  True, False,  True])
 

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -6310,7 +6310,7 @@ def dtypes(self):
         >>> df.dtypes
         float              float64
         int                  int64
-        datetime    datetime64[s]
+        datetime    datetime64[us]
         string              str
         dtype: object
         """

@@ -1475,7 +1475,7 @@ def idxmin(self, skipna: bool = True) -> Series:
         >>> ser.groupby(["a", "a", "b", "b"]).idxmin()
         a   2023-01-01
         b   2023-02-01
-        dtype: datetime64[s]
+        dtype: datetime64[us]
         """
         return self._idxmax_idxmin("idxmin", skipna=skipna)
 
@@ -1536,7 +1536,7 @@ def idxmax(self, skipna: bool = True) -> Series:
         >>> ser.groupby(["a", "a", "b", "b"]).idxmax()
         a   2023-01-15
         b   2023-02-15
-        dtype: datetime64[s]
+        dtype: datetime64[us]
         """
         return self._idxmax_idxmin("idxmax", skipna=skipna)
 

diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
@@ -252,7 +252,7 @@ class DatetimeIndex(DatetimeTimedeltaMixin):
     >>> idx = pd.DatetimeIndex(["1/1/2020 10:00:00+00:00", "2/1/2020 11:00:00+00:00"])
     >>> idx
     DatetimeIndex(['2020-01-01 10:00:00+00:00', '2020-02-01 11:00:00+00:00'],
-    dtype='datetime64[s, UTC]', freq=None)
+    dtype='datetime64[us, UTC]', freq=None)
     """
 
     _typ = "datetimeindex"

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -2132,14 +2132,14 @@ def unique(self) -> ArrayLike:
         >>> pd.Series([pd.Timestamp("2016-01-01") for _ in range(3)]).unique()
         <DatetimeArray>
         ['2016-01-01 00:00:00']
-        Length: 1, dtype: datetime64[s]
+        Length: 1, dtype: datetime64[us]
 
         >>> pd.Series(
         ...     [pd.Timestamp("2016-01-01", tz="US/Eastern") for _ in range(3)]
         ... ).unique()
         <DatetimeArray>
         ['2016-01-01 00:00:00-05:00']
-        Length: 1, dtype: datetime64[s, US/Eastern]
+        Length: 1, dtype: datetime64[us, US/Eastern]
 
         An Categorical will return categories in the order of
         appearance and with the same dtype.

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
@@ -884,7 +884,7 @@ def to_datetime(
     >>> pd.to_datetime(df)
     0   2015-02-04
     1   2016-03-05
-    dtype: datetime64[s]
+    dtype: datetime64[us]
 
     Using a unix epoch time
 
@@ -927,14 +927,14 @@ def to_datetime(
 
     >>> pd.to_datetime(["2018-10-26 12:00:00", "2018-10-26 13:00:15"])
     DatetimeIndex(['2018-10-26 12:00:00', '2018-10-26 13:00:15'],
-                  dtype='datetime64[s]', freq=None)
+                  dtype='datetime64[us]', freq=None)
 
     - Timezone-aware inputs *with constant time offset* are converted to
       timezone-aware :class:`DatetimeIndex`:
 
     >>> pd.to_datetime(["2018-10-26 12:00 -0500", "2018-10-26 13:00 -0500"])
     DatetimeIndex(['2018-10-26 12:00:00-05:00', '2018-10-26 13:00:00-05:00'],
-                  dtype='datetime64[s, UTC-05:00]', freq=None)
+                  dtype='datetime64[us, UTC-05:00]', freq=None)
 
     - However, timezone-aware inputs *with mixed time offsets* (for example
       issued from a timezone with daylight savings, such as Europe/Paris)
@@ -976,14 +976,14 @@ def to_datetime(
 
     >>> pd.to_datetime(["2018-10-26 12:00", "2018-10-26 13:00"], utc=True)
     DatetimeIndex(['2018-10-26 12:00:00+00:00', '2018-10-26 13:00:00+00:00'],
-                  dtype='datetime64[s, UTC]', freq=None)
+                  dtype='datetime64[us, UTC]', freq=None)
 
     - Timezone-aware inputs are *converted* to UTC (the output represents the
       exact same datetime, but viewed from the UTC time offset `+00:00`).
 
     >>> pd.to_datetime(["2018-10-26 12:00 -0530", "2018-10-26 12:00 -0500"], utc=True)
     DatetimeIndex(['2018-10-26 17:30:00+00:00', '2018-10-26 17:00:00+00:00'],
-                  dtype='datetime64[s, UTC]', freq=None)
+                  dtype='datetime64[us, UTC]', freq=None)
 
     - Inputs can contain both string or datetime, the above
       rules still apply

diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py
@@ -127,7 +127,7 @@ def test_dt64_array(dtype_unit):
         (
             pd.DatetimeIndex(["2000", "2001"]),
             None,
-            DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[s]"),
+            DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[us]"),
         ),
         (
             ["2000", "2001"],
@@ -323,7 +323,7 @@ def test_array_copy():
         ([pd.Interval(0, 1), pd.Interval(1, 2)], IntervalArray.from_breaks([0, 1, 2])),
         # datetime
         (
-            [pd.Timestamp("2000"), pd.Timestamp("2001")],
+            [pd.Timestamp("2000").as_unit("s"), pd.Timestamp("2001").as_unit("s")],
             DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[s]"),
         ),
         (
@@ -342,7 +342,10 @@ def test_array_copy():
         ),
         # datetimetz
         (
-            [pd.Timestamp("2000", tz="CET"), pd.Timestamp("2001", tz="CET")],
+            [
+                pd.Timestamp("2000", tz="CET").as_unit("s"),
+                pd.Timestamp("2001", tz="CET").as_unit("s"),
+            ],
             DatetimeArray._from_sequence(
                 ["2000", "2001"], dtype=pd.DatetimeTZDtype(tz="CET", unit="s")
             ),