Skip to content

Fix alignment in Series subtraction with MultiIndex, Index and NaN values (#60908) #61381

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 27 additions & 23 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -5890,32 +5890,36 @@ def _arith_method(self, other, op):
self, other = self._align_for_op(other)
return base.IndexOpsMixin._arith_method(self, other, op)

def _align_for_op(self, right, align_asobject: bool = False):
"""align lhs and rhs Series"""
# TODO: Different from DataFrame._align_for_op, list, tuple and ndarray
# are not coerced here
# because Series has inconsistencies described in GH#13637
def _align_for_op(self, right, align_asobject=False, fill_value=np.nan):
"""Align lhs and rhs Series for arithmetic operations"""

left = self

if isinstance(right, Series):
# avoid repeated alignment
if not left.index.equals(right.index):
if align_asobject:
if left.dtype not in (object, np.bool_) or right.dtype not in (
object,
np.bool_,
):
pass
# GH#52538 no longer cast in these cases
else:
# to keep original value's dtype for bool ops
left = left.astype(object)
right = right.astype(object)

left, right = left.align(right)

return left, right
if not isinstance(right, Series):
return left, right

if left.index.equals(right.index):
return left, right

if not (hasattr(left.index, "levels") or hasattr(right.index, "levels")):
if align_asobject:
if left.empty or right.empty:
if left.dtype not in (object, np.bool_) or right.dtype not in (object, np.bool_):
return left.iloc[0:0], right.iloc[0:0]
return left.align(right, join='outer', fill_value=fill_value)

if hasattr(left.index, "levels") and not hasattr(right.index, "levels"):
if left.empty or right.empty:
return left.iloc[0:0], right.iloc[0:0]
else:
first_level = left.index.get_level_values(0)
left = left.astype(object)
right = right.astype(object)
right_aligned = right.reindex(first_level, fill_value=fill_value)
return left, right_aligned

return left.align(right, join='outer', fill_value=fill_value)

def _binop(self, other: Series, func, level=None, fill_value=None) -> Series:
"""
Perform generic binary operation with optional fill value.
Expand Down
38 changes: 38 additions & 0 deletions pandas/tests/series/test_subtraction_nanindex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import pytest
import pandas as pd
import numpy as np
import pandas.testing as tm

def test_series_subtraction_with_nan_and_levels():
ix1 = pd.MultiIndex.from_arrays(
[
[np.nan, 81, 81, 82, 82],
[np.nan] * 5,
pd.to_datetime([np.nan, '2018-06-01', '2018-07-01', '2018-07-01', '2018-08-01'])
],
names=['foo', 'bar', 'date']
)

s1 = pd.Series(
[np.nan, 25.058969, 22.519751, 20.847981, 21.625236],
index=ix1
)

ix2 = pd.Index([81, 82, 83, 84, 85, 86, 87], name='foo')
s2 = pd.Series(
[28.2800, 25.2500, 22.2200, 16.7660, 14.0087, 14.9480, 29.2900],
index=ix2
)

expected = pd.Series(
[np.nan, -3.221031, -5.760249, -4.402019, -3.624764],
index=ix1,
dtype='float64'
)

result = s1 - s2

result = result.astype('float64')

tm.assert_series_equal(result, expected)

Loading