diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 38755aef32b85..a956e8fe6b30f 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1267,6 +1267,7 @@ Other - Bug in printing a :class:`DataFrame` with a :class:`DataFrame` stored in :attr:`DataFrame.attrs` raised a ``ValueError`` (:issue:`60455`) - Bug in printing a :class:`Series` with a :class:`DataFrame` stored in :attr:`Series.attrs` raised a ``ValueError`` (:issue:`60568`) - Deprecated the keyword ``check_datetimelike_compat`` in :meth:`testing.assert_frame_equal` and :meth:`testing.assert_series_equal` (:issue:`55638`) +- Fixed bug in :meth:`DataFrame.combine` with non-unique columns (:issue:`51340`) - Fixed bug in :meth:`Series.replace` and :meth:`DataFrame.replace` when trying to replace :class:`NA` values in a :class:`Float64Dtype` object with ``np.nan``; this now works with ``pd.set_option("mode.nan_is_na", False)`` and is irrelevant otherwise (:issue:`55127`) - Fixed bug in :meth:`Series.replace` and :meth:`DataFrame.replace` when trying to replace :class:`np.nan` values in a :class:`Int64Dtype` object with :class:`NA`; this is now a no-op with ``pd.set_option("mode.nan_is_na", False)`` and is irrelevant otherwise (:issue:`51237`) - Fixed bug in the :meth:`Series.rank` with object dtype and extremely small float values (:issue:`62036`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 68ea6795d47dd..fcd4d6aaf1195 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -194,7 +194,9 @@ nargsort, ) -from pandas.io.common import get_handle +from pandas.io.common import ( + get_handle, +) from pandas.io.formats import ( console, format as fmt, @@ -9093,7 +9095,7 @@ def combine( 2 NaN 3.0 1.0 """ other_idxlen = len(other.index) # save for compare - other_columns = other.columns + self_columns, other_columns = self.columns, other.columns this, other = self.align(other) new_index = this.index @@ -9105,12 +9107,12 @@ def combine( return other.copy() # preserve column order - new_columns = self.columns.union(other_columns, sort=False) + new_columns = self_columns.union(other_columns, sort=False) do_fill = fill_value is not None result = {} - for col in new_columns: - series = this[col] - other_series = other[col] + for i in range(this.shape[1]): + series = this.iloc[:, i] + other_series = other.iloc[:, i] this_dtype = series.dtype other_dtype = other_series.dtype @@ -9121,7 +9123,7 @@ def combine( # don't overwrite columns unnecessarily # DO propagate if this column is not in the intersection if not overwrite and other_mask.all(): - result[col] = this[col].copy() + result[this.columns[i]] = this.iloc[:, i].copy() continue if do_fill: @@ -9130,7 +9132,7 @@ def combine( series[this_mask] = fill_value other_series[other_mask] = fill_value - if col not in self.columns: + if other.columns[i] not in self.columns: # If self DataFrame does not have col in other DataFrame, # try to promote series, which is all NaN, as other_dtype. new_dtype = other_dtype @@ -9155,7 +9157,7 @@ def combine( arr, new_dtype ) - result[col] = arr + result[this.columns[i]] = arr # convert_objects just in case frame_result = self._constructor(result, index=new_index, columns=new_columns) diff --git a/pandas/tests/frame/methods/test_combine.py b/pandas/tests/frame/methods/test_combine.py index bc6a67e4e1f32..abc8fda8fb88d 100644 --- a/pandas/tests/frame/methods/test_combine.py +++ b/pandas/tests/frame/methods/test_combine.py @@ -45,3 +45,17 @@ def test_combine_generic(self, float_frame): ) tm.assert_frame_equal(chunk, exp) tm.assert_frame_equal(chunk2, exp) + + def test_combine_non_unique_columns(self): + # GH#51340 + df = pd.DataFrame({"A": range(5), "B": range(5)}) + df.columns = ["A", "A"] + + other = df.copy() + df.iloc[1, :] = 11 + + def combiner(a, b): + return b + + result = df.combine(other, combiner) + tm.assert_frame_equal(result, other)