DEPR: DataFrame.lookup (#35224)

erfannariman · web-flow · commit 1a3a2c18ff1d · 2020-09-16T22:39:40.000-04:00
diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst
@@ -1480,17 +1480,27 @@ default value.
    s.get('a')  # equivalent to s['a']
    s.get('x', default=-1)
 
-The :meth:`~pandas.DataFrame.lookup` method
--------------------------------------------
+.. _indexing.lookup:
+
+Looking up values by index/column labels
+----------------------------------------
 
 Sometimes you want to extract a set of values given a sequence of row labels
-and column labels, and the ``lookup`` method allows for this and returns a
-NumPy array.  For instance:
+and column labels, this can be achieved by ``DataFrame.melt`` combined by filtering the corresponding
+rows with ``DataFrame.loc``.  For instance:
 
 .. ipython:: python
 
-  dflookup = pd.DataFrame(np.random.rand(20, 4), columns = ['A', 'B', 'C', 'D'])
-  dflookup.lookup(list(range(0, 10, 2)), ['B', 'C', 'A', 'B', 'D'])
+    df = pd.DataFrame({'col': ["A", "A", "B", "B"],
+                       'A': [80, 23, np.nan, 22],
+                       'B': [80, 55, 76, 67]})
+    df
+    melt = df.melt('col')
+    melt = melt.loc[melt['col'] == melt['variable'], 'value']
+    melt.reset_index(drop=True)
+
+Formerly this could be achieved with the dedicated ``DataFrame.lookup`` method
+which was deprecated in version 1.2.0.
 
 .. _indexing.class:
 
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
@@ -210,6 +210,7 @@ Deprecations
 - Deprecated parameter ``inplace`` in :meth:`MultiIndex.set_codes` and :meth:`MultiIndex.set_levels` (:issue:`35626`)
 - Deprecated parameter ``dtype`` in :~meth:`Index.copy` on method all index classes. Use the :meth:`Index.astype` method instead for changing dtype(:issue:`35853`)
 - Date parser functions :func:`~pandas.io.date_converters.parse_date_time`, :func:`~pandas.io.date_converters.parse_date_fields`, :func:`~pandas.io.date_converters.parse_all_fields` and :func:`~pandas.io.date_converters.generic_parser` from ``pandas.io.date_converters`` are deprecated and will be removed in a future version; use :func:`to_datetime` instead (:issue:`35741`)
+- :meth:`DataFrame.lookup` is deprecated and will be removed in a future version, use :meth:`DataFrame.melt` and :meth:`DataFrame.loc` instead (:issue:`18682`)
 
 .. ---------------------------------------------------------------------------
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -3842,10 +3842,15 @@ def _series(self):
     def lookup(self, row_labels, col_labels) -> np.ndarray:
         """
         Label-based "fancy indexing" function for DataFrame.
-
         Given equal-length arrays of row and column labels, return an
         array of the values corresponding to each (row, col) pair.
 
+        .. deprecated:: 1.2.0
+            DataFrame.lookup is deprecated,
+            use DataFrame.melt and DataFrame.loc instead.
+            For an example see :meth:`~pandas.DataFrame.lookup`
+            in the user guide.
+
         Parameters
         ----------
         row_labels : sequence
@@ -3858,6 +3863,14 @@ def lookup(self, row_labels, col_labels) -> np.ndarray:
         numpy.ndarray
             The found values.
         """
+        msg = (
+            "The 'lookup' method is deprecated and will be"
+            "removed in a future version."
+            "You can use DataFrame.melt and DataFrame.loc"
+            "as a substitute."
+        )
+        warnings.warn(msg, FutureWarning, stacklevel=2)
+
         n = len(row_labels)
         if n != len(col_labels):
             raise ValueError("Row labels must have same size as column labels")
diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
@@ -1340,7 +1340,8 @@ def test_lookup_float(self, float_frame):
         df = float_frame
         rows = list(df.index) * len(df.columns)
         cols = list(df.columns) * len(df.index)
-        result = df.lookup(rows, cols)
+        with tm.assert_produces_warning(FutureWarning):
+            result = df.lookup(rows, cols)
 
         expected = np.array([df.loc[r, c] for r, c in zip(rows, cols)])
         tm.assert_numpy_array_equal(result, expected)
@@ -1349,7 +1350,8 @@ def test_lookup_mixed(self, float_string_frame):
         df = float_string_frame
         rows = list(df.index) * len(df.columns)
         cols = list(df.columns) * len(df.index)
-        result = df.lookup(rows, cols)
+        with tm.assert_produces_warning(FutureWarning):
+            result = df.lookup(rows, cols)
 
         expected = np.array(
             [df.loc[r, c] for r, c in zip(rows, cols)], dtype=np.object_
@@ -1365,7 +1367,8 @@ def test_lookup_bool(self):
                 "mask_c": [False, True, False, True],
             }
         )
-        df["mask"] = df.lookup(df.index, "mask_" + df["label"])
+        with tm.assert_produces_warning(FutureWarning):
+            df["mask"] = df.lookup(df.index, "mask_" + df["label"])
 
         exp_mask = np.array(
             [df.loc[r, c] for r, c in zip(df.index, "mask_" + df["label"])]
@@ -1376,13 +1379,16 @@ def test_lookup_bool(self):
 
     def test_lookup_raises(self, float_frame):
         with pytest.raises(KeyError, match="'One or more row labels was not found'"):
-            float_frame.lookup(["xyz"], ["A"])
+            with tm.assert_produces_warning(FutureWarning):
+                float_frame.lookup(["xyz"], ["A"])
 
         with pytest.raises(KeyError, match="'One or more column labels was not found'"):
-            float_frame.lookup([float_frame.index[0]], ["xyz"])
+            with tm.assert_produces_warning(FutureWarning):
+                float_frame.lookup([float_frame.index[0]], ["xyz"])
 
         with pytest.raises(ValueError, match="same size"):
-            float_frame.lookup(["a", "b", "c"], ["a"])
+            with tm.assert_produces_warning(FutureWarning):
+                float_frame.lookup(["a", "b", "c"], ["a"])
 
     def test_lookup_requires_unique_axes(self):
         # GH#33041 raise with a helpful error message
@@ -1393,14 +1399,17 @@ def test_lookup_requires_unique_axes(self):
 
         # homogeneous-dtype case
         with pytest.raises(ValueError, match="requires unique index and columns"):
-            df.lookup(rows, cols)
+            with tm.assert_produces_warning(FutureWarning):
+                df.lookup(rows, cols)
         with pytest.raises(ValueError, match="requires unique index and columns"):
-            df.T.lookup(cols, rows)
+            with tm.assert_produces_warning(FutureWarning):
+                df.T.lookup(cols, rows)
 
         # heterogeneous dtype
         df["B"] = 0
         with pytest.raises(ValueError, match="requires unique index and columns"):
-            df.lookup(rows, cols)
+            with tm.assert_produces_warning(FutureWarning):
+                df.lookup(rows, cols)
 
     def test_set_value(self, float_frame):
         for idx in float_frame.index:
@@ -2232,3 +2241,12 @@ def test_object_casting_indexing_wraps_datetimelike():
     assert blk.dtype == "m8[ns]"  # we got the right block
     val = blk.iget((0, 0))
     assert isinstance(val, pd.Timedelta)
+
+
+def test_lookup_deprecated():
+    # GH18262
+    df = pd.DataFrame(
+        {"col": ["A", "A", "B", "B"], "A": [80, 23, np.nan, 22], "B": [80, 55, 76, 67]}
+    )
+    with tm.assert_produces_warning(FutureWarning):
+        df.lookup(df.index, df["col"])