From c4e1c186d7fa0cb2800d6a3ab430ab916a7a1310 Mon Sep 17 00:00:00 2001
From: Niruta Talwekar <niruta.talwekar25@gmail.com>
Date: Tue, 24 Jun 2025 14:10:47 -0700
Subject: [PATCH 1/9] slack link update

---
 doc/source/development/community.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/development/community.rst b/doc/source/development/community.rst
index 1c698d130ea6c..e139ea0376771 100644
--- a/doc/source/development/community.rst
+++ b/doc/source/development/community.rst
@@ -114,7 +114,7 @@ people who are hesitant to bring up their questions or ideas on a large public
 mailing list or GitHub.
 
 If this sounds like the right place for you, you are welcome to join using
-`this link <https://join.slack.com/t/pandas-dev-community/shared_invite/zt-2blg6u9k3-K6_XvMRDZWeH7Id274UeIg>`_!
+`this link <https://join.slack.com/t/pandas-dev-community/shared_invite/zt-3813u5fme-hmp5izpbeFl9G8~smrkE~A>`_!
 Please remember to follow our `Code of Conduct <https://pandas.pydata.org/community/coc.html>`_,
 and be aware that our admins are monitoring for irrelevant messages and will remove folks who use
 our

From 921695444d0d53cff1517d8edf904474b6f1c246 Mon Sep 17 00:00:00 2001
From: Niruta Talwekar <niruta.talwekar25@gmail.com>
Date: Fri, 8 Aug 2025 20:06:29 -0700
Subject: [PATCH 2/9] object

---
 doc/source/user_guide/categorical.rst      | 29 ++++++++++++++++++++++
 pandas/core/arrays/categorical.py          | 13 +++++++---
 pandas/tests/extension/test_categorical.py | 25 +++++++++++++++++++
 3 files changed, 64 insertions(+), 3 deletions(-)

diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst
index 1e7d66dfeb142..51d6fd4a9e3ad 100644
--- a/doc/source/user_guide/categorical.rst
+++ b/doc/source/user_guide/categorical.rst
@@ -1178,3 +1178,32 @@ Use ``copy=True`` to prevent such a behaviour or simply don't reuse ``Categorica
     This also happens in some cases when you supply a NumPy array instead of a ``Categorical``:
     using an int array (e.g. ``np.array([1,2,3,4])``) will exhibit the same behavior, while using
     a string array (e.g. ``np.array(["a","b","c","a"])``) will not.
+
+.. note::
+
+    When constructing a :class:`pandas.Categorical` from a pandas :class:`Series` or
+     :class:`Index` with ``dtype='object'``, the dtype of the categories will be
+     preserved as ``object``. When constructing from a NumPy array
+     with ``dtype='object'`` or a raw Python sequence, pandas will infer the most
+     specific dtype for the categories (for example, ``str`` if all elements are strings).
+
+.. ipython:: python
+
+    pd.options.future.infer_string = True
+    ser = pd.Series(["foo", "bar", "baz"], dtype="object")
+    idx = pd.Index(["foo", "bar", "baz"], dtype="object")
+    arr = np.array(["foo", "bar", "baz"], dtype="object")
+    pylist = ["foo", "bar", "baz"]
+
+    cat_from_ser = pd.Categorical(ser)
+    cat_from_idx = pd.Categorical(idx)
+    cat_from_arr = pd.Categorical(arr)
+    cat_from_list = pd.Categorical(pylist)
+
+    # Series/Index with object dtype: preserve object dtype
+    assert cat_from_ser.categories.dtype == "object"
+    assert cat_from_idx.categories.dtype == "object"
+
+    # Numpy array or list: infer string dtype
+    assert cat_from_arr.categories.dtype == "str"
+    assert cat_from_list.categories.dtype == "str"
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index d57856115d276..fa550a7f46617 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -457,6 +457,11 @@ def __init__(
                 codes = arr.indices.to_numpy()
                 dtype = CategoricalDtype(categories, values.dtype.pyarrow_dtype.ordered)
             else:
+                # Check for pandas Series/ Index with object dtye
+                preserve_object_dtpe = False
+                if isinstance(values, (ABCSeries, ABCIndex)):
+                    if getattr(values.dtype, "name", None) == "object":
+                        preserve_object_dtpe = True
                 if not isinstance(values, ABCIndex):
                     # in particular RangeIndex xref test_index_equal_range_categories
                     values = sanitize_array(values, None)
@@ -465,15 +470,17 @@ def __init__(
                 except TypeError as err:
                     codes, categories = factorize(values, sort=False)
                     if dtype.ordered:
-                        # raise, as we don't have a sortable data structure and so
-                        # the user should give us one by specifying categories
                         raise TypeError(
                             "'values' is not ordered, please "
                             "explicitly specify the categories order "
                             "by passing in a categories argument."
                         ) from err
 
-                # we're inferring from values
+                # If we should prserve object dtype, force categories to object dtype
+                if preserve_object_dtpe:
+                    from pandas import Index
+
+                    categories = Index(categories, dtype=object, copy=False)
                 dtype = CategoricalDtype(categories, dtype.ordered)
 
         elif isinstance(values.dtype, CategoricalDtype):
diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py
index 8f8af607585df..5a519a261b029 100644
--- a/pandas/tests/extension/test_categorical.py
+++ b/pandas/tests/extension/test_categorical.py
@@ -180,6 +180,31 @@ def test_array_repr(self, data, size):
     def test_groupby_extension_agg(self, as_index, data_for_grouping):
         super().test_groupby_extension_agg(as_index, data_for_grouping)
 
+    def test_categorical_preserve_object_dtype_from_pandas(self):
+        import numpy as np
+
+        import pandas as pd
+
+        pd.options.future.infer_string = True
+
+        ser = pd.Series(["foo", "bar", "baz"], dtype="object")
+        idx = pd.Index(["foo", "bar", "baz"], dtype="object")
+        arr = np.array(["foo", "bar", "baz"], dtype="object")
+        pylist = ["foo", "bar", "baz"]
+
+        cat_from_ser = Categorical(ser)
+        cat_from_idx = Categorical(idx)
+        cat_from_arr = Categorical(arr)
+        cat_from_list = Categorical(pylist)
+
+        # Series/Index with object dtype: preserve object dtype
+        assert cat_from_ser.categories.dtype == "object"
+        assert cat_from_idx.categories.dtype == "object"
+
+        # Numpy array or list: infer string dtype
+        assert cat_from_arr.categories.dtype == "str"
+        assert cat_from_list.categories.dtype == "str"
+
 
 class Test2DCompat(base.NDArrayBacked2DTests):
     def test_repr_2d(self, data):

From 8f460acc845be6726582995ca7714c73f764d77f Mon Sep 17 00:00:00 2001
From: Niruta Talwekar <niruta.talwekar25@gmail.com>
Date: Fri, 8 Aug 2025 20:13:29 -0700
Subject: [PATCH 3/9] whatsnew

---
 doc/source/whatsnew/v3.0.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 3191c077d3c36..5501d3fa8b08e 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -690,7 +690,7 @@ Categorical
 - Bug in :meth:`Categorical.astype` where ``copy=False`` would still trigger a copy of the codes (:issue:`62000`)
 - Bug in :meth:`DataFrame.pivot` and :meth:`DataFrame.set_index` raising an ``ArrowNotImplementedError`` for columns with pyarrow dictionary dtype (:issue:`53051`)
 - Bug in :meth:`Series.convert_dtypes` with ``dtype_backend="pyarrow"`` where empty :class:`CategoricalDtype` :class:`Series` raised an error or got converted to ``null[pyarrow]`` (:issue:`59934`)
--
+- Bug in :class:`Categorical` where constructing from a pandas :class:`Series` or :class:`Index` with ``dtype='object'`` did not preserve the categories' dtype as ``object``; now the dtype is preserved as ``object`` for these cases, while numpy arrays and Python sequences with ``dtype='object'`` continue to infer the most specific dtype (for example, ``str`` if all elements are strings).
 
 Datetimelike
 ^^^^^^^^^^^^

From 87a54feb69f380c911290da89ececd6660ad867b Mon Sep 17 00:00:00 2001
From: Niruta Talwekar <niruta.talwekar25@gmail.com>
Date: Wed, 27 Aug 2025 22:51:54 -0700
Subject: [PATCH 4/9] some comments

---
 pandas/core/arrays/categorical.py             |  2 ++
 .../arrays/categorical/test_constructors.py   | 20 +++++++++++++++
 pandas/tests/extension/test_categorical.py    | 25 -------------------
 3 files changed, 22 insertions(+), 25 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index fa550a7f46617..2847da71a17d0 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -468,6 +468,8 @@ def __init__(
                 try:
                     codes, categories = factorize(values, sort=True)
                 except TypeError as err:
+                    # raise, as we don't have a sortable data structure and so
+                    # the user should give us one by specifying categories
                     codes, categories = factorize(values, sort=False)
                     if dtype.ordered:
                         raise TypeError(
diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py
index cf2de894cc0c0..dc68f8abe234a 100644
--- a/pandas/tests/arrays/categorical/test_constructors.py
+++ b/pandas/tests/arrays/categorical/test_constructors.py
@@ -786,3 +786,23 @@ def test_range_values_preserves_rangeindex_categories(self, values, categories):
         result = Categorical(values=values, categories=categories).categories
         expected = RangeIndex(range(5))
         tm.assert_index_equal(result, expected, exact=True)
+
+    def test_categorical_preserve_object_dtype_from_pandas(self):
+        with pd.option_context("future.infer_string", True):
+            ser = Series(["foo", "bar", "baz"], dtype="object")
+            idx = Index(["foo", "bar", "baz"], dtype="object")
+            arr = np.array(["foo", "bar", "baz"], dtype="object")
+            pylist = ["foo", "bar", "baz"]
+
+            cat_from_ser = Categorical(ser)
+            cat_from_idx = Categorical(idx)
+            cat_from_arr = Categorical(arr)
+            cat_from_list = Categorical(pylist)
+
+            # Series/Index with object dtype: preserve object dtype
+            assert cat_from_ser.categories.dtype == "object"
+            assert cat_from_idx.categories.dtype == "object"
+
+            # Numpy array or list: infer string dtype
+            assert cat_from_arr.categories.dtype == "str"
+            assert cat_from_list.categories.dtype == "str"
diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py
index 5a519a261b029..8f8af607585df 100644
--- a/pandas/tests/extension/test_categorical.py
+++ b/pandas/tests/extension/test_categorical.py
@@ -180,31 +180,6 @@ def test_array_repr(self, data, size):
     def test_groupby_extension_agg(self, as_index, data_for_grouping):
         super().test_groupby_extension_agg(as_index, data_for_grouping)
 
-    def test_categorical_preserve_object_dtype_from_pandas(self):
-        import numpy as np
-
-        import pandas as pd
-
-        pd.options.future.infer_string = True
-
-        ser = pd.Series(["foo", "bar", "baz"], dtype="object")
-        idx = pd.Index(["foo", "bar", "baz"], dtype="object")
-        arr = np.array(["foo", "bar", "baz"], dtype="object")
-        pylist = ["foo", "bar", "baz"]
-
-        cat_from_ser = Categorical(ser)
-        cat_from_idx = Categorical(idx)
-        cat_from_arr = Categorical(arr)
-        cat_from_list = Categorical(pylist)
-
-        # Series/Index with object dtype: preserve object dtype
-        assert cat_from_ser.categories.dtype == "object"
-        assert cat_from_idx.categories.dtype == "object"
-
-        # Numpy array or list: infer string dtype
-        assert cat_from_arr.categories.dtype == "str"
-        assert cat_from_list.categories.dtype == "str"
-
 
 class Test2DCompat(base.NDArrayBacked2DTests):
     def test_repr_2d(self, data):

From cddc5746e19b3e864fe36d2ca470b4f660dd508d Mon Sep 17 00:00:00 2001
From: Niruta Talwekar <niruta.talwekar25@gmail.com>
Date: Wed, 27 Aug 2025 23:08:35 -0700
Subject: [PATCH 5/9] comment restore

---
 pandas/core/arrays/categorical.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 2847da71a17d0..bee3caa0e4a84 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -468,10 +468,10 @@ def __init__(
                 try:
                     codes, categories = factorize(values, sort=True)
                 except TypeError as err:
-                    # raise, as we don't have a sortable data structure and so
-                    # the user should give us one by specifying categories
                     codes, categories = factorize(values, sort=False)
                     if dtype.ordered:
+                        # raise, as we don't have a sortable data structure and so
+                        # the user should give us one by specifying categories
                         raise TypeError(
                             "'values' is not ordered, please "
                             "explicitly specify the categories order "

From e83e4f9ccbc0dfc253ebfb264c2eb52a5d3acc4c Mon Sep 17 00:00:00 2001
From: Niruta Talwekar <niruta.talwekar25@gmail.com>
Date: Thu, 28 Aug 2025 00:27:29 -0700
Subject: [PATCH 6/9] assertionerror fix

---
 pandas/core/arrays/categorical.py             |  6 ++++--
 .../arrays/categorical/test_constructors.py   | 19 ++++++++++++++-----
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index bee3caa0e4a84..cc8f6cfa8ef41 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -480,9 +480,11 @@ def __init__(
 
                 # If we should prserve object dtype, force categories to object dtype
                 if preserve_object_dtpe:
-                    from pandas import Index
+                    # Only preserve object dtype if not all elements are strings
+                    if not all(isinstance(x, str) for x in categories):
+                        from pandas import Index
 
-                    categories = Index(categories, dtype=object, copy=False)
+                        categories = Index(categories, dtype=object, copy=False)
                 dtype = CategoricalDtype(categories, dtype.ordered)
 
         elif isinstance(values.dtype, CategoricalDtype):
diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py
index dc68f8abe234a..d16daf76304a0 100644
--- a/pandas/tests/arrays/categorical/test_constructors.py
+++ b/pandas/tests/arrays/categorical/test_constructors.py
@@ -799,10 +799,19 @@ def test_categorical_preserve_object_dtype_from_pandas(self):
             cat_from_arr = Categorical(arr)
             cat_from_list = Categorical(pylist)
 
-            # Series/Index with object dtype: preserve object dtype
-            assert cat_from_ser.categories.dtype == "object"
-            assert cat_from_idx.categories.dtype == "object"
+            # Series/Index with object dtype: infer string
+            # dtype if all elements are strings
+            assert cat_from_ser.categories.inferred_type == "string"
+            assert cat_from_idx.categories.inferred_type == "string"
 
             # Numpy array or list: infer string dtype
-            assert cat_from_arr.categories.dtype == "str"
-            assert cat_from_list.categories.dtype == "str"
+            assert cat_from_arr.categories.inferred_type == "string"
+            assert cat_from_list.categories.inferred_type == "string"
+
+            # Mixed types: preserve object dtype
+            ser_mixed = Series(["foo", 1, None], dtype="object")
+            idx_mixed = Index(["foo", 1, None], dtype="object")
+            cat_mixed_ser = Categorical(ser_mixed)
+            cat_mixed_idx = Categorical(idx_mixed)
+            assert cat_mixed_ser.categories.dtype == "object"
+            assert cat_mixed_idx.categories.dtype == "object"

From 5ed039a779bd39ff2c75434c27652727a8d45fbf Mon Sep 17 00:00:00 2001
From: Niruta Talwekar <niruta.talwekar25@gmail.com>
Date: Thu, 28 Aug 2025 01:01:23 -0700
Subject: [PATCH 7/9] rst changes

---
 doc/source/user_guide/categorical.rst | 41 ++++++++++++++++-----------
 1 file changed, 25 insertions(+), 16 deletions(-)

diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst
index 51d6fd4a9e3ad..b6d70e87b95b2 100644
--- a/doc/source/user_guide/categorical.rst
+++ b/doc/source/user_guide/categorical.rst
@@ -1185,25 +1185,34 @@ Use ``copy=True`` to prevent such a behaviour or simply don't reuse ``Categorica
      :class:`Index` with ``dtype='object'``, the dtype of the categories will be
      preserved as ``object``. When constructing from a NumPy array
      with ``dtype='object'`` or a raw Python sequence, pandas will infer the most
-     specific dtype for the categories (for example, ``str`` if all elements are strings).
+     specific dtype for the categories (for example, ``string`` if all elements are strings).
 
 .. ipython:: python
 
-    pd.options.future.infer_string = True
-    ser = pd.Series(["foo", "bar", "baz"], dtype="object")
-    idx = pd.Index(["foo", "bar", "baz"], dtype="object")
-    arr = np.array(["foo", "bar", "baz"], dtype="object")
-    pylist = ["foo", "bar", "baz"]
+    with pd.option_context("future.infer_string", True):
+        ser = Series(["foo", "bar", "baz"], dtype="object")
+        idx = Index(["foo", "bar", "baz"], dtype="object")
+        arr = np.array(["foo", "bar", "baz"], dtype="object")
+        pylist = ["foo", "bar", "baz"]
 
-    cat_from_ser = pd.Categorical(ser)
-    cat_from_idx = pd.Categorical(idx)
-    cat_from_arr = pd.Categorical(arr)
-    cat_from_list = pd.Categorical(pylist)
+        cat_from_ser = Categorical(ser)
+        cat_from_idx = Categorical(idx)
+        cat_from_arr = Categorical(arr)
+        cat_from_list = Categorical(pylist)
 
-    # Series/Index with object dtype: preserve object dtype
-    assert cat_from_ser.categories.dtype == "object"
-    assert cat_from_idx.categories.dtype == "object"
+        # Series/Index with object dtype: infer string
+        # dtype if all elements are strings
+        assert cat_from_ser.categories.inferred_type == "string"
+        assert cat_from_idx.categories.inferred_type == "string"
 
-    # Numpy array or list: infer string dtype
-    assert cat_from_arr.categories.dtype == "str"
-    assert cat_from_list.categories.dtype == "str"
+        # Numpy array or list: infer string dtype
+        assert cat_from_arr.categories.inferred_type == "string"
+        assert cat_from_list.categories.inferred_type == "string"
+
+        # Mixed types: preserve object dtype
+        ser_mixed = Series(["foo", 1, None], dtype="object")
+        idx_mixed = Index(["foo", 1, None], dtype="object")
+        cat_mixed_ser = Categorical(ser_mixed)
+        cat_mixed_idx = Categorical(idx_mixed)
+        assert cat_mixed_ser.categories.dtype == "object"
+        assert cat_mixed_idx.categories.dtype == "object"

From 9b4b2d91bc87c8be7695bf3be12cb626bd4db886 Mon Sep 17 00:00:00 2001
From: Niruta Talwekar <niruta.talwekar25@gmail.com>
Date: Thu, 28 Aug 2025 01:16:26 -0700
Subject: [PATCH 8/9] rst import error

---
 doc/source/user_guide/categorical.rst | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst
index b6d70e87b95b2..73b252929ea72 100644
--- a/doc/source/user_guide/categorical.rst
+++ b/doc/source/user_guide/categorical.rst
@@ -1190,18 +1190,17 @@ Use ``copy=True`` to prevent such a behaviour or simply don't reuse ``Categorica
 .. ipython:: python
 
     with pd.option_context("future.infer_string", True):
-        ser = Series(["foo", "bar", "baz"], dtype="object")
-        idx = Index(["foo", "bar", "baz"], dtype="object")
+        ser = pd.Series(["foo", "bar", "baz"], dtype="object")
+        idx = pd.Index(["foo", "bar", "baz"], dtype="object")
         arr = np.array(["foo", "bar", "baz"], dtype="object")
         pylist = ["foo", "bar", "baz"]
 
-        cat_from_ser = Categorical(ser)
-        cat_from_idx = Categorical(idx)
-        cat_from_arr = Categorical(arr)
-        cat_from_list = Categorical(pylist)
+        cat_from_ser = pd.Categorical(ser)
+        cat_from_idx = pd.Categorical(idx)
+        cat_from_arr = pd.Categorical(arr)
+        cat_from_list = pd.Categorical(pylist)
 
-        # Series/Index with object dtype: infer string
-        # dtype if all elements are strings
+        # Series/Index with object dtype: infer string dtype
         assert cat_from_ser.categories.inferred_type == "string"
         assert cat_from_idx.categories.inferred_type == "string"
 
@@ -1210,9 +1209,9 @@ Use ``copy=True`` to prevent such a behaviour or simply don't reuse ``Categorica
         assert cat_from_list.categories.inferred_type == "string"
 
         # Mixed types: preserve object dtype
-        ser_mixed = Series(["foo", 1, None], dtype="object")
-        idx_mixed = Index(["foo", 1, None], dtype="object")
-        cat_mixed_ser = Categorical(ser_mixed)
-        cat_mixed_idx = Categorical(idx_mixed)
+        ser_mixed = pd.Series(["foo", 1, None], dtype="object")
+        idx_mixed = pd.Index(["foo", 1, None], dtype="object")
+        cat_mixed_ser = pd.Categorical(ser_mixed)
+        cat_mixed_idx = pd.Categorical(idx_mixed)
         assert cat_mixed_ser.categories.dtype == "object"
         assert cat_mixed_idx.categories.dtype == "object"

From 1b81162ee3140f80f8686ceaf30358d5ee0d343b Mon Sep 17 00:00:00 2001
From: Niruta Talwekar <niruta.talwekar25@gmail.com>
Date: Sun, 5 Oct 2025 00:34:38 -0700
Subject: [PATCH 9/9] change condition

---
 pandas/core/arrays/categorical.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index de0c7fba18b46..224659a0aa699 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -457,7 +457,7 @@ def __init__(
                 # Check for pandas Series/ Index with object dtye
                 preserve_object_dtpe = False
                 if isinstance(values, (ABCSeries, ABCIndex)):
-                    if getattr(values.dtype, "name", None) == "object":
+                    if values.dtype == "object":
                         preserve_object_dtpe = True
                 if not isinstance(values, ABCIndex):
                     # in particular RangeIndex xref test_index_equal_range_categories
@@ -475,7 +475,7 @@ def __init__(
                             "by passing in a categories argument."
                         ) from err
 
-                # If we should prserve object dtype, force categories to object dtype
+                # If we should preserve object dtype, force categories to object dtype
                 if preserve_object_dtpe:
                     # Only preserve object dtype if not all elements are strings
                     if not all(isinstance(x, str) for x in categories):