Skip to content

Commit 7413f10

Browse files
dsaxtonrhshadrach
authored andcommitted
BUG: Always cast to Categorical in lexsort_indexer (pandas-dev#36385)
1 parent de09b38 commit 7413f10

File tree

3 files changed

+22
-8
lines changed

3 files changed

+22
-8
lines changed

doc/source/whatsnew/v1.1.3.rst

+1
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ Bug fixes
4545
- Bug in :func:`read_spss` where passing a ``pathlib.Path`` as ``path`` would raise a ``TypeError`` (:issue:`33666`)
4646
- Bug in :meth:`Series.str.startswith` and :meth:`Series.str.endswith` with ``category`` dtype not propagating ``na`` parameter (:issue:`36241`)
4747
- Bug in :class:`Series` constructor where integer overflow would occur for sufficiently large scalar inputs when an index was provided (:issue:`36291`)
48+
- Bug in :meth:`DataFrame.sort_values` raising an ``AttributeError`` when sorting on a key that casts column to categorical dtype (:issue:`36383`)
4849
- Bug in :meth:`DataFrame.stack` raising a ``ValueError`` when stacking :class:`MultiIndex` columns based on position when the levels had duplicate names (:issue:`36353`)
4950

5051
.. ---------------------------------------------------------------------------

pandas/core/sorting.py

+1-8
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
from pandas.core.dtypes.common import (
2121
ensure_int64,
2222
ensure_platform_int,
23-
is_categorical_dtype,
2423
is_extension_array_dtype,
2524
)
2625
from pandas.core.dtypes.generic import ABCMultiIndex
@@ -294,13 +293,7 @@ def lexsort_indexer(
294293
keys = [ensure_key_mapped(k, key) for k in keys]
295294

296295
for k, order in zip(keys, orders):
297-
# we are already a Categorical
298-
if is_categorical_dtype(k):
299-
cat = k
300-
301-
# create the Categorical
302-
else:
303-
cat = Categorical(k, ordered=True)
296+
cat = Categorical(k, ordered=True)
304297

305298
if na_position not in ["last", "first"]:
306299
raise ValueError(f"invalid na_position: {na_position}")

pandas/tests/frame/methods/test_sort_values.py

+20
Original file line numberDiff line numberDiff line change
@@ -691,3 +691,23 @@ def test_sort_values_key_dict_axis(self):
691691
result = df.sort_values(1, key=lambda col: -col, axis=1)
692692
expected = df.loc[:, ::-1]
693693
tm.assert_frame_equal(result, expected)
694+
695+
@pytest.mark.parametrize("ordered", [True, False])
696+
def test_sort_values_key_casts_to_categorical(self, ordered):
697+
# https://github.com/pandas-dev/pandas/issues/36383
698+
categories = ["c", "b", "a"]
699+
df = pd.DataFrame({"x": [1, 1, 1], "y": ["a", "b", "c"]})
700+
701+
def sorter(key):
702+
if key.name == "y":
703+
return pd.Series(
704+
pd.Categorical(key, categories=categories, ordered=ordered)
705+
)
706+
return key
707+
708+
result = df.sort_values(by=["x", "y"], key=sorter)
709+
expected = pd.DataFrame(
710+
{"x": [1, 1, 1], "y": ["c", "b", "a"]}, index=pd.Index([2, 1, 0])
711+
)
712+
713+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)