Skip to content

Commit 1a3a2c1

Browse files
authored
DEPR: DataFrame.lookup (#35224)
1 parent 70d618c commit 1a3a2c1

File tree

4 files changed

+58
-16
lines changed

4 files changed

+58
-16
lines changed

Diff for: doc/source/user_guide/indexing.rst

+16-6
Original file line numberDiff line numberDiff line change
@@ -1480,17 +1480,27 @@ default value.
14801480
s.get('a') # equivalent to s['a']
14811481
s.get('x', default=-1)
14821482
1483-
The :meth:`~pandas.DataFrame.lookup` method
1484-
-------------------------------------------
1483+
.. _indexing.lookup:
1484+
1485+
Looking up values by index/column labels
1486+
----------------------------------------
14851487

14861488
Sometimes you want to extract a set of values given a sequence of row labels
1487-
and column labels, and the ``lookup`` method allows for this and returns a
1488-
NumPy array. For instance:
1489+
and column labels, this can be achieved by ``DataFrame.melt`` combined by filtering the corresponding
1490+
rows with ``DataFrame.loc``. For instance:
14891491

14901492
.. ipython:: python
14911493
1492-
dflookup = pd.DataFrame(np.random.rand(20, 4), columns = ['A', 'B', 'C', 'D'])
1493-
dflookup.lookup(list(range(0, 10, 2)), ['B', 'C', 'A', 'B', 'D'])
1494+
df = pd.DataFrame({'col': ["A", "A", "B", "B"],
1495+
'A': [80, 23, np.nan, 22],
1496+
'B': [80, 55, 76, 67]})
1497+
df
1498+
melt = df.melt('col')
1499+
melt = melt.loc[melt['col'] == melt['variable'], 'value']
1500+
melt.reset_index(drop=True)
1501+
1502+
Formerly this could be achieved with the dedicated ``DataFrame.lookup`` method
1503+
which was deprecated in version 1.2.0.
14941504

14951505
.. _indexing.class:
14961506

Diff for: doc/source/whatsnew/v1.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,7 @@ Deprecations
210210
- Deprecated parameter ``inplace`` in :meth:`MultiIndex.set_codes` and :meth:`MultiIndex.set_levels` (:issue:`35626`)
211211
- Deprecated parameter ``dtype`` in :~meth:`Index.copy` on method all index classes. Use the :meth:`Index.astype` method instead for changing dtype(:issue:`35853`)
212212
- Date parser functions :func:`~pandas.io.date_converters.parse_date_time`, :func:`~pandas.io.date_converters.parse_date_fields`, :func:`~pandas.io.date_converters.parse_all_fields` and :func:`~pandas.io.date_converters.generic_parser` from ``pandas.io.date_converters`` are deprecated and will be removed in a future version; use :func:`to_datetime` instead (:issue:`35741`)
213+
- :meth:`DataFrame.lookup` is deprecated and will be removed in a future version, use :meth:`DataFrame.melt` and :meth:`DataFrame.loc` instead (:issue:`18682`)
213214

214215
.. ---------------------------------------------------------------------------
215216

Diff for: pandas/core/frame.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -3842,10 +3842,15 @@ def _series(self):
38423842
def lookup(self, row_labels, col_labels) -> np.ndarray:
38433843
"""
38443844
Label-based "fancy indexing" function for DataFrame.
3845-
38463845
Given equal-length arrays of row and column labels, return an
38473846
array of the values corresponding to each (row, col) pair.
38483847
3848+
.. deprecated:: 1.2.0
3849+
DataFrame.lookup is deprecated,
3850+
use DataFrame.melt and DataFrame.loc instead.
3851+
For an example see :meth:`~pandas.DataFrame.lookup`
3852+
in the user guide.
3853+
38493854
Parameters
38503855
----------
38513856
row_labels : sequence
@@ -3858,6 +3863,14 @@ def lookup(self, row_labels, col_labels) -> np.ndarray:
38583863
numpy.ndarray
38593864
The found values.
38603865
"""
3866+
msg = (
3867+
"The 'lookup' method is deprecated and will be"
3868+
"removed in a future version."
3869+
"You can use DataFrame.melt and DataFrame.loc"
3870+
"as a substitute."
3871+
)
3872+
warnings.warn(msg, FutureWarning, stacklevel=2)
3873+
38613874
n = len(row_labels)
38623875
if n != len(col_labels):
38633876
raise ValueError("Row labels must have same size as column labels")

Diff for: pandas/tests/frame/indexing/test_indexing.py

+27-9
Original file line numberDiff line numberDiff line change
@@ -1340,7 +1340,8 @@ def test_lookup_float(self, float_frame):
13401340
df = float_frame
13411341
rows = list(df.index) * len(df.columns)
13421342
cols = list(df.columns) * len(df.index)
1343-
result = df.lookup(rows, cols)
1343+
with tm.assert_produces_warning(FutureWarning):
1344+
result = df.lookup(rows, cols)
13441345

13451346
expected = np.array([df.loc[r, c] for r, c in zip(rows, cols)])
13461347
tm.assert_numpy_array_equal(result, expected)
@@ -1349,7 +1350,8 @@ def test_lookup_mixed(self, float_string_frame):
13491350
df = float_string_frame
13501351
rows = list(df.index) * len(df.columns)
13511352
cols = list(df.columns) * len(df.index)
1352-
result = df.lookup(rows, cols)
1353+
with tm.assert_produces_warning(FutureWarning):
1354+
result = df.lookup(rows, cols)
13531355

13541356
expected = np.array(
13551357
[df.loc[r, c] for r, c in zip(rows, cols)], dtype=np.object_
@@ -1365,7 +1367,8 @@ def test_lookup_bool(self):
13651367
"mask_c": [False, True, False, True],
13661368
}
13671369
)
1368-
df["mask"] = df.lookup(df.index, "mask_" + df["label"])
1370+
with tm.assert_produces_warning(FutureWarning):
1371+
df["mask"] = df.lookup(df.index, "mask_" + df["label"])
13691372

13701373
exp_mask = np.array(
13711374
[df.loc[r, c] for r, c in zip(df.index, "mask_" + df["label"])]
@@ -1376,13 +1379,16 @@ def test_lookup_bool(self):
13761379

13771380
def test_lookup_raises(self, float_frame):
13781381
with pytest.raises(KeyError, match="'One or more row labels was not found'"):
1379-
float_frame.lookup(["xyz"], ["A"])
1382+
with tm.assert_produces_warning(FutureWarning):
1383+
float_frame.lookup(["xyz"], ["A"])
13801384

13811385
with pytest.raises(KeyError, match="'One or more column labels was not found'"):
1382-
float_frame.lookup([float_frame.index[0]], ["xyz"])
1386+
with tm.assert_produces_warning(FutureWarning):
1387+
float_frame.lookup([float_frame.index[0]], ["xyz"])
13831388

13841389
with pytest.raises(ValueError, match="same size"):
1385-
float_frame.lookup(["a", "b", "c"], ["a"])
1390+
with tm.assert_produces_warning(FutureWarning):
1391+
float_frame.lookup(["a", "b", "c"], ["a"])
13861392

13871393
def test_lookup_requires_unique_axes(self):
13881394
# GH#33041 raise with a helpful error message
@@ -1393,14 +1399,17 @@ def test_lookup_requires_unique_axes(self):
13931399

13941400
# homogeneous-dtype case
13951401
with pytest.raises(ValueError, match="requires unique index and columns"):
1396-
df.lookup(rows, cols)
1402+
with tm.assert_produces_warning(FutureWarning):
1403+
df.lookup(rows, cols)
13971404
with pytest.raises(ValueError, match="requires unique index and columns"):
1398-
df.T.lookup(cols, rows)
1405+
with tm.assert_produces_warning(FutureWarning):
1406+
df.T.lookup(cols, rows)
13991407

14001408
# heterogeneous dtype
14011409
df["B"] = 0
14021410
with pytest.raises(ValueError, match="requires unique index and columns"):
1403-
df.lookup(rows, cols)
1411+
with tm.assert_produces_warning(FutureWarning):
1412+
df.lookup(rows, cols)
14041413

14051414
def test_set_value(self, float_frame):
14061415
for idx in float_frame.index:
@@ -2232,3 +2241,12 @@ def test_object_casting_indexing_wraps_datetimelike():
22322241
assert blk.dtype == "m8[ns]" # we got the right block
22332242
val = blk.iget((0, 0))
22342243
assert isinstance(val, pd.Timedelta)
2244+
2245+
2246+
def test_lookup_deprecated():
2247+
# GH18262
2248+
df = pd.DataFrame(
2249+
{"col": ["A", "A", "B", "B"], "A": [80, 23, np.nan, 22], "B": [80, 55, 76, 67]}
2250+
)
2251+
with tm.assert_produces_warning(FutureWarning):
2252+
df.lookup(df.index, df["col"])

0 commit comments

Comments
 (0)