[Cherry-pick PR 29913], add View(reuse allocation) strategy on squeeze,...

[Cherry-pick PR 29913], add View(reuse allocation) strategy on squeeze, unsqueeze, reshape, flatten op (#29913) (#30258) * add view strategy on squeeze,unsqueeze,reshape,flatten * add squeeze unittest * add unittests * use View strategy as name rather than Reuse Allacation * fix view api doc * fix format * use core.ops when input of reshape2 is Tensor * fix test_cross_entropy_loss error because of reshape2 * delete selected_rows * change op_function * little change * solve HandleViewBetweenInputAndOutput

[Cherry-pick PR 29913], add View(reuse allocation) strategy on squeeze,...
[Cherry-pick PR 29913], add View(reuse allocation) strategy on squeeze, unsqueeze, reshape, flatten op (#29913) (#30258) * add view strategy on squeeze,unsqueeze,reshape,flatten * add squeeze unittest * add unittests * use View strategy as name rather than Reuse Allacation * fix view api doc * fix format * use core.ops when input of reshape2 is Tensor * fix test_cross_entropy_loss error because of reshape2 * delete selected_rows * change op_function * little change * solve HandleViewBetweenInputAndOutput
7c943a65 · pangyoki · GitHub · afbc6367 · 7c943a65 · 7c943a65
4 changed file
--- a/paddle/fluid/pybind/op_function.h
+++ b/paddle/fluid/pybind/op_function.h
@@ -147,6 +147,32 @@ ConstructDuplicableOutput(const size_t num) {
  }
  return res;
 }
+
+static inline void HandleViewBetweenInputAndOutput(
+    const std::shared_ptr<imperative::VarBase>& input_var,
+    const std::shared_ptr<imperative::VarBase>& view_output_var) {
+  PADDLE_ENFORCE_EQ(
+      input_var->Var().IsInitialized(), true,
+      platform::errors::InvalidArgument("Tensor %s has not been initialized!",
+                                        input_var->Name()));
+
+  if (input_var->Var().IsType<framework::LoDTensor>()) {
+    const auto& input_tensor = input_var->Var().Get<framework::LoDTensor>();
+    PADDLE_ENFORCE_EQ(
+        input_tensor.IsInitialized(), true,
+        platform::errors::InvalidArgument(
+            "LoDTensor %s has not been initialized!", input_var->Name()));
+
+    auto* view_output_tensor =
+        view_output_var->MutableVar()->GetMutable<framework::LoDTensor>();
+    view_output_tensor->ShareDataWith(input_tensor);
+    view_output_tensor->ShareInplaceVersionCounterWith(input_tensor);
+
+    VLOG(3) << "Perform View between Output Var(" << view_output_var->Name()
+            << ") and Input Var(" << input_var->Name()
+            << "), share allocation and inplace version.";
+  }
+}
 }  // namespace pybind
 }  // namespace paddle


--- a/paddle/fluid/pybind/op_function_generator.cc
+++ b/paddle/fluid/pybind/op_function_generator.cc
@@ -133,6 +133,19 @@ std::map<std::string, std::set<std::string>> op_passing_outs_map = {
    {"moving_average_abs_max_scale", {"OutScale", "OutAccum", "OutState"}},
 };

+// NOTE(pangyoki): Tensor View Strategy.
+// In this case, a new output varbase will be created, and this varbase will
+// reuse the input varbase's allocation.
+// It's a 2-layer map. The key of outer map is the view op name, the value is
+// also a map which implies the mapping relationship between the output and
+// input varbase.
+std::map<std::string, std::pair<std::string, std::string>> view_op_map = {
+    {"squeeze2", {"X", "Out"}},  // "X" -> "Out"
+    {"unsqueeze2", {"X", "Out"}},
+    {"reshape2", {"X", "Out"}},
+    {"flatten_contiguous_range", {"X", "Out"}},
+};
+
 // clang-format off
 const char* OUT_INITIALIZER_TEMPLATE =
    R"({"%s", {std::shared_ptr<imperative::VarBase>(new imperative::VarBase(tracer->GenerateUniqueName()))}})";
@@ -188,6 +201,11 @@ const char* RETURN_TEMPLATE = R"(outs["%s"][0])";
 const char* FUNCTION_ARGS = R"(%s, const py::args& args)";
 const char* FUNCTION_ARGS_NO_INPUT = R"(const py::args& args)";

+const char* HandleViewBetweenInputAndOutput = R"(
+    if (ins.count("%s") && outs.count("%s")) {
+      HandleViewBetweenInputAndOutput(ins["%s"][0], outs["%s"][0]);
+    })";
+
 const char* OP_FUNCTION_TEMPLATE =
 R"(
 %s %s(%s)
@@ -224,6 +242,10 @@ static inline bool FindPassingOutsMap(const std::string& op_type,
  return op_passing_outs_map[op_type].count(out_name);
 }

+static inline bool FindViewOpMap(const std::string& op_type) {
+  return view_op_map.count(op_type);
+}
+
 static inline std::string TempName(const std::string& name) {
  return name + '_';
 }
@@ -254,6 +276,7 @@ GenerateOpFunctions(const std::string& module_name) {
    int arg_idx = 0;
    int input_args_num = 0;
    std::string ins_cast_str = "";
+    std::string view_strategy_str = "";
    for (auto& input : op_proto->inputs()) {
      auto& in_name = input.name();
      // skip those dispensable inputs, like ResidualData in conv2d
@@ -369,6 +392,13 @@ GenerateOpFunctions(const std::string& module_name) {
      return_str.pop_back();
    }
    outs_initializer += "}";
+    if (FindViewOpMap(op_type)) {
+      std::string viwe_input_name = view_op_map[op_type].first;
+      std::string viwe_output_name = view_op_map[op_type].second;
+      view_strategy_str += paddle::string::Sprintf(
+          HandleViewBetweenInputAndOutput, viwe_input_name, viwe_output_name,
+          viwe_input_name, viwe_output_name);
+    }
    if (outs_num == 0) {
      return_type = "void";
    }
@@ -388,7 +418,8 @@ GenerateOpFunctions(const std::string& module_name) {
    auto op_function_str = paddle::string::Sprintf(
        OP_FUNCTION_TEMPLATE, return_type, func_name, function_args,
        ins_cast_str, op_type, input_args_num, outs_initializer,
-        ins_initializer, ins_initializer_with_null + outs_initializer_with_null,
+        ins_initializer, ins_initializer_with_null +
+                             outs_initializer_with_null + view_strategy_str,
        op_type, return_str);

    // generate pybind item

--- a/python/paddle/fluid/tests/unittests/test_view_op_reuse_allocation.py
+++ b/python/paddle/fluid/tests/unittests/test_view_op_reuse_allocation.py
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+import unittest
+
+import numpy as np
+
+from op_test import OpTest
+import paddle
+
+
+# NOTE(pangyoki): Tensor View Strategy.
+# Refer to `op_function_generator.py`.
+# For view op, a new output varbase will be created, and this varbase will
+# reuse the input varbase's allocation.
+# View APIs include: `squeeze`, `unsqueeze`, `reshape`, `flatten`, `detach`
+class TestDygraphViewReuseAllocation(unittest.TestCase):
+    def setUp(self):
+        self.init_shape()
+
+    def init_shape(self):
+        self.input_shape = [2, 3, 1]
+        self.output_shape = [2, 3]
+
+    def view_api_processing(self, var):
+        return paddle.squeeze(var)
+
+    def test_view_api(self):
+        var = paddle.rand(self.input_shape)
+        view_var = self.view_api_processing(var)
+        view_var[0] = 2.
+        self.assertEqual(var.shape, self.input_shape)
+        self.assertEqual(view_var.shape, self.output_shape)
+
+        var_numpy = var.numpy().reshape(self.output_shape)
+        view_var_numpy = view_var.numpy()
+        self.assertTrue(np.array_equal(var_numpy, view_var_numpy))
+
+    def test_forward_version(self):
+        var = paddle.rand(self.input_shape)
+        self.assertEqual(var.inplace_version, 0)
+        view_var = self.view_api_processing(var)
+        self.assertEqual(view_var.inplace_version, 0)
+
+        var[0] = 2.
+        self.assertEqual(var.inplace_version, 1)
+        self.assertEqual(view_var.inplace_version, 1)
+
+        view_var_2 = self.view_api_processing(var)
+        self.assertEqual(view_var_2.inplace_version, 1)
+
+        var[0] = 3.
+        self.assertEqual(view_var.inplace_version, 2)
+        self.assertEqual(view_var_2.inplace_version, 2)
+
+    def test_backward_error(self):
+        # It raises an error because the inplace operator will result
+        # in incorrect gradient computation.
+        with paddle.fluid.dygraph.guard():
+            var_a = paddle.ones(shape=self.input_shape, dtype="float32")
+            var_a.stop_gradient = False
+
+            var_b = var_a**2
+
+            # Here, the gradient computation will use the value of var_b
+            var_c = var_b**2
+            view_var_b = self.view_api_processing(var_b)
+            view_var_b[0] = 2.  # var_b is modified inplace
+
+            loss = paddle.nn.functional.relu(var_c)
+            with self.assertRaisesRegexp(
+                    RuntimeError,
+                    "received tensor_version:{} != wrapper_version_snapshot:{}".
+                    format(1, 0)):
+                loss.backward()
+
+
+class TestUnsqueezeDygraphViewReuseAllocation(TestDygraphViewReuseAllocation):
+    def init_shape(self):
+        self.input_shape = [2, 3]
+        self.output_shape = [2, 3, 1]
+
+    def view_api_processing(self, var):
+        return paddle.unsqueeze(var, -1)
+
+
+class TestReshapeDygraphViewReuseAllocation(TestDygraphViewReuseAllocation):
+    def init_shape(self):
+        self.input_shape = [3, 4]
+        self.output_shape = [2, 2, 3]
+
+    def view_api_processing(self, var):
+        return paddle.reshape(var, [2, 2, 3])
+
+
+class TestFlattenDygraphViewReuseAllocation(TestDygraphViewReuseAllocation):
+    def init_shape(self):
+        self.input_shape = [3, 4]
+        self.output_shape = [12]
+
+    def view_api_processing(self, var):
+        return paddle.flatten(var)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/python/paddle/tensor/manipulation.py
+++ b/python/paddle/tensor/manipulation.py
@@ -167,6 +167,10 @@ def flatten(x, start_axis=0, stop_axis=-1, name=None):

    Flattens a contiguous range of axes in a tensor according to start_axis and stop_axis.

+    Note that the output Tensor will share data with origin Tensor and doesn't have a 
+    Tensor copy in ``dygraph`` mode. If you want to use the Tensor copy version, please 
+    use `Tensor.clone` like ``flatten_clone_x = x.flatten().clone()``.
+
    For Example:

    .. code-block:: text
@@ -219,12 +223,16 @@ def flatten(x, start_axis=0, stop_axis=-1, name=None):
            import paddle

            image_shape=(2, 3, 4, 4)
-            
+
            x = paddle.arange(end=image_shape[0] * image_shape[1] * image_shape[2] * image_shape[3])
            img = paddle.reshape(x, image_shape)
-            
+
            out = paddle.flatten(img, start_axis=1, stop_axis=2)
            # out shape is [2, 12, 4]
+
+            # out shares data with img in dygraph mode
+            img[0, 0, 0, 0] = -1
+            print(out[0, 0, 0]) # [-1]
    """
    if not (isinstance(x, Variable)):
        raise ValueError("The input x should be a Tensor")
@@ -479,6 +487,10 @@ def split(x, num_or_sections, axis=0, name=None):
 def squeeze(x, axis=None, name=None):
    """
    This OP will squeeze the dimension(s) of size 1 of input tensor x's shape. 
+    
+    Note that the output Tensor will share data with origin Tensor and doesn't have a 
+    Tensor copy in ``dygraph`` mode. If you want to use the Tensor copy version, 
+    please use `Tensor.clone` like ``squeeze_clone_x = x.squeeze().clone()``.

    If axis is provided, it will remove the dimension(s) by given axis that of size 1. 
    If the dimension of given axis is not of size 1, the dimension remain unchanged. 
@@ -536,8 +548,14 @@ def squeeze(x, axis=None, name=None):
            
            x = paddle.rand([5, 1, 10])
            output = paddle.squeeze(x, axis=1)
+
+            print(x.shape)  # [5, 1, 10]
            print(output.shape)  # [5, 10]

+            # output shares data with x in dygraph mode
+            x[0, 0, 0] = 10.
+            print(output[0, 0]) # [10.]
+
    """
    if axis is None:
        axis = []
@@ -678,6 +696,10 @@ def unsqueeze(x, axis, name=None):
    required argument axis, a dimension or list of dimensions that will be inserted.
    Dimension indices in axis are as seen in the output tensor.

+    Note that the output Tensor will share data with origin Tensor and doesn't have a 
+    Tensor copy in ``dygraph`` mode. If you want to use the Tensor copy version, 
+    please use `Tensor.clone` like ``unsqueeze_clone_x = x.unsqueeze(-1).clone()``.
+
    Args:
        x (Tensor): The input Tensor to be unsqueezed. Supported data type: float32, float64, bool, int8, int32, int64.
        axis (int|list|tuple|Tensor): Indicates the dimensions to be inserted. The data type is ``int32`` . 
@@ -706,6 +728,12 @@ def unsqueeze(x, axis, name=None):
            axis = paddle.to_tensor([0, 1, 2])
            out3 = paddle.unsqueeze(x, axis=axis) 
            print(out3.shape)  # [1, 1, 1, 5, 10]
+
+            # out1, out2, out3 share data with x in dygraph mode
+            x[0, 0] = 10.
+            print(out1[0, 0, 0]) # [10.]
+            print(out2[0, 0, 0, 0]) # [10.]
+            print(out3[0, 0, 0, 0, 0]) # [10.]
            
    """

@@ -1382,6 +1410,11 @@ def reshape(x, shape, name=None):
    """
    This operator changes the shape of ``x`` without changing its data.

+    Note that the output Tensor will share data with origin Tensor and doesn't
+    have a Tensor copy in ``dygraph`` mode. 
+    If you want to use the Tensor copy version, please use `Tensor.clone` like 
+    ``reshape_clone_x = x.reshape([-1]).clone()``.
+
    Some tricks exist when specifying the target shape.

    1. -1 means the value of this dimension is inferred from the total element
@@ -1430,16 +1463,24 @@ def reshape(x, shape, name=None):

            x = paddle.rand([2, 4, 6], dtype="float32")
            positive_four = paddle.full([1], 4, "int32")
+
            out = paddle.reshape(x, [-1, 0, 3, 2])
            print(out)
            # the shape is [2,4,3,2].
+
            out = paddle.reshape(x, shape=[positive_four, 12])
            print(out)
            # the shape of out_2 is [4, 12].
+
            shape_tensor = paddle.to_tensor(np.array([8, 6]).astype("int32"))
            out = paddle.reshape(x, shape=shape_tensor)
            print(out)
            # the shape is [8, 6].
+            # out shares data with x in dygraph mode
+            x[0, 0, 0] = 10.
+            print(out[0, 0])
+            # the value is [10.]
+
    """
    return paddle.fluid.layers.reshape(x=x, shape=shape, name=name)