From da16b33f2e2be9b1a144267ab506d78824aed6fc Mon Sep 17 00:00:00 2001
From: pangyoki <pangyoki@126.com>
Date: Sat, 9 Jan 2021 18:36:39 +0800
Subject: [PATCH] add View(reuse allocation) strategy on squeeze, unsqueeze,
 reshape, flatten op (#29913)

* add view strategy on squeeze,unsqueeze,reshape,flatten

* add squeeze unittest

* add unittests

* use View strategy as name rather than Reuse Allacation

* fix view api doc

* fix format

* use core.ops when input of reshape2 is Tensor

* fix test_cross_entropy_loss error because of reshape2

* delete selected_rows

* change op_function

* little change

* solve HandleViewBetweenInputAndOutput
---
 paddle/fluid/pybind/op_function.h             |  26 ++++
 paddle/fluid/pybind/op_function_generator.cc  |  33 ++++-
 .../test_view_op_reuse_allocation.py          | 118 ++++++++++++++++++
 python/paddle/tensor/manipulation.py          |  45 ++++++-
 4 files changed, 219 insertions(+), 3 deletions(-)
 create mode 100644 python/paddle/fluid/tests/unittests/test_view_op_reuse_allocation.py

diff --git a/paddle/fluid/pybind/op_function.h b/paddle/fluid/pybind/op_function.h
index 1e20ac958b..0c45753121 100644
--- a/paddle/fluid/pybind/op_function.h
+++ b/paddle/fluid/pybind/op_function.h
@@ -147,6 +147,32 @@ ConstructDuplicableOutput(const size_t num) {
   }
   return res;
 }
+
+static inline void HandleViewBetweenInputAndOutput(
+    const std::shared_ptr<imperative::VarBase>& input_var,
+    const std::shared_ptr<imperative::VarBase>& view_output_var) {
+  PADDLE_ENFORCE_EQ(
+      input_var->Var().IsInitialized(), true,
+      platform::errors::InvalidArgument("Tensor %s has not been initialized!",
+                                        input_var->Name()));
+
+  if (input_var->Var().IsType<framework::LoDTensor>()) {
+    const auto& input_tensor = input_var->Var().Get<framework::LoDTensor>();
+    PADDLE_ENFORCE_EQ(
+        input_tensor.IsInitialized(), true,
+        platform::errors::InvalidArgument(
+            "LoDTensor %s has not been initialized!", input_var->Name()));
+
+    auto* view_output_tensor =
+        view_output_var->MutableVar()->GetMutable<framework::LoDTensor>();
+    view_output_tensor->ShareDataWith(input_tensor);
+    view_output_tensor->ShareInplaceVersionCounterWith(input_tensor);
+
+    VLOG(3) << "Perform View between Output Var(" << view_output_var->Name()
+            << ") and Input Var(" << input_var->Name()
+            << "), share allocation and inplace version.";
+  }
+}
 }  // namespace pybind
 }  // namespace paddle
 
diff --git a/paddle/fluid/pybind/op_function_generator.cc b/paddle/fluid/pybind/op_function_generator.cc
index b011511487..349162c2e5 100644
--- a/paddle/fluid/pybind/op_function_generator.cc
+++ b/paddle/fluid/pybind/op_function_generator.cc
@@ -139,6 +139,19 @@ std::map<std::string, std::set<std::string>> op_passing_outs_map = {
     {"rnn", {"DropoutState"}},
 };
 
+// NOTE(pangyoki): Tensor View Strategy.
+// In this case, a new output varbase will be created, and this varbase will
+// reuse the input varbase's allocation.
+// It's a 2-layer map. The key of outer map is the view op name, the value is
+// also a map which implies the mapping relationship between the output and
+// input varbase.
+std::map<std::string, std::pair<std::string, std::string>> view_op_map = {
+    {"squeeze2", {"X", "Out"}},  // "X" -> "Out"
+    {"unsqueeze2", {"X", "Out"}},
+    {"reshape2", {"X", "Out"}},
+    {"flatten_contiguous_range", {"X", "Out"}},
+};
+
 // clang-format off
 const char* OUT_INITIALIZER_TEMPLATE =
     R"({"%s", {std::shared_ptr<imperative::VarBase>(new imperative::VarBase(tracer->GenerateUniqueName()))}})";
@@ -194,6 +207,11 @@ const char* RETURN_TEMPLATE = R"(outs["%s"][0])";
 const char* FUNCTION_ARGS = R"(%s, const py::args& args)";
 const char* FUNCTION_ARGS_NO_INPUT = R"(const py::args& args)";
 
+const char* HandleViewBetweenInputAndOutput = R"(
+    if (ins.count("%s") && outs.count("%s")) {
+      HandleViewBetweenInputAndOutput(ins["%s"][0], outs["%s"][0]);
+    })";
+
 const char* OP_FUNCTION_TEMPLATE =
 R"(
 %s %s(%s)
@@ -230,6 +248,10 @@ static inline bool FindPassingOutsMap(const std::string& op_type,
   return op_passing_outs_map[op_type].count(out_name);
 }
 
+static inline bool FindViewOpMap(const std::string& op_type) {
+  return view_op_map.count(op_type);
+}
+
 static inline std::string TempName(const std::string& name) {
   return name + '_';
 }
@@ -260,6 +282,7 @@ GenerateOpFunctions(const std::string& module_name) {
     int arg_idx = 0;
     int input_args_num = 0;
     std::string ins_cast_str = "";
+    std::string view_strategy_str = "";
     for (auto& input : op_proto->inputs()) {
       auto& in_name = input.name();
       // skip those dispensable inputs, like ResidualData in conv2d
@@ -375,6 +398,13 @@ GenerateOpFunctions(const std::string& module_name) {
       return_str.pop_back();
     }
     outs_initializer += "}";
+    if (FindViewOpMap(op_type)) {
+      std::string viwe_input_name = view_op_map[op_type].first;
+      std::string viwe_output_name = view_op_map[op_type].second;
+      view_strategy_str += paddle::string::Sprintf(
+          HandleViewBetweenInputAndOutput, viwe_input_name, viwe_output_name,
+          viwe_input_name, viwe_output_name);
+    }
     if (outs_num == 0) {
       return_type = "void";
     }
@@ -394,7 +424,8 @@ GenerateOpFunctions(const std::string& module_name) {
     auto op_function_str = paddle::string::Sprintf(
         OP_FUNCTION_TEMPLATE, return_type, func_name, function_args,
         ins_cast_str, op_type, input_args_num, outs_initializer,
-        ins_initializer, ins_initializer_with_null + outs_initializer_with_null,
+        ins_initializer, ins_initializer_with_null +
+                             outs_initializer_with_null + view_strategy_str,
         op_type, return_str);
 
     // generate pybind item
diff --git a/python/paddle/fluid/tests/unittests/test_view_op_reuse_allocation.py b/python/paddle/fluid/tests/unittests/test_view_op_reuse_allocation.py
new file mode 100644
index 0000000000..9cabcf49bc
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_view_op_reuse_allocation.py
@@ -0,0 +1,118 @@
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+import unittest
+
+import numpy as np
+
+from op_test import OpTest
+import paddle
+
+
+# NOTE(pangyoki): Tensor View Strategy.
+# Refer to `op_function_generator.py`.
+# For view op, a new output varbase will be created, and this varbase will
+# reuse the input varbase's allocation.
+# View APIs include: `squeeze`, `unsqueeze`, `reshape`, `flatten`, `detach`
+class TestDygraphViewReuseAllocation(unittest.TestCase):
+    def setUp(self):
+        self.init_shape()
+
+    def init_shape(self):
+        self.input_shape = [2, 3, 1]
+        self.output_shape = [2, 3]
+
+    def view_api_processing(self, var):
+        return paddle.squeeze(var)
+
+    def test_view_api(self):
+        var = paddle.rand(self.input_shape)
+        view_var = self.view_api_processing(var)
+        view_var[0] = 2.
+        self.assertEqual(var.shape, self.input_shape)
+        self.assertEqual(view_var.shape, self.output_shape)
+
+        var_numpy = var.numpy().reshape(self.output_shape)
+        view_var_numpy = view_var.numpy()
+        self.assertTrue(np.array_equal(var_numpy, view_var_numpy))
+
+    def test_forward_version(self):
+        var = paddle.rand(self.input_shape)
+        self.assertEqual(var.inplace_version, 0)
+        view_var = self.view_api_processing(var)
+        self.assertEqual(view_var.inplace_version, 0)
+
+        var[0] = 2.
+        self.assertEqual(var.inplace_version, 1)
+        self.assertEqual(view_var.inplace_version, 1)
+
+        view_var_2 = self.view_api_processing(var)
+        self.assertEqual(view_var_2.inplace_version, 1)
+
+        var[0] = 3.
+        self.assertEqual(view_var.inplace_version, 2)
+        self.assertEqual(view_var_2.inplace_version, 2)
+
+    def test_backward_error(self):
+        # It raises an error because the inplace operator will result
+        # in incorrect gradient computation.
+        with paddle.fluid.dygraph.guard():
+            var_a = paddle.ones(shape=self.input_shape, dtype="float32")
+            var_a.stop_gradient = False
+
+            var_b = var_a**2
+
+            # Here, the gradient computation will use the value of var_b
+            var_c = var_b**2
+            view_var_b = self.view_api_processing(var_b)
+            view_var_b[0] = 2.  # var_b is modified inplace
+
+            loss = paddle.nn.functional.relu(var_c)
+            with self.assertRaisesRegexp(
+                    RuntimeError,
+                    "received tensor_version:{} != wrapper_version_snapshot:{}".
+                    format(1, 0)):
+                loss.backward()
+
+
+class TestUnsqueezeDygraphViewReuseAllocation(TestDygraphViewReuseAllocation):
+    def init_shape(self):
+        self.input_shape = [2, 3]
+        self.output_shape = [2, 3, 1]
+
+    def view_api_processing(self, var):
+        return paddle.unsqueeze(var, -1)
+
+
+class TestReshapeDygraphViewReuseAllocation(TestDygraphViewReuseAllocation):
+    def init_shape(self):
+        self.input_shape = [3, 4]
+        self.output_shape = [2, 2, 3]
+
+    def view_api_processing(self, var):
+        return paddle.reshape(var, [2, 2, 3])
+
+
+class TestFlattenDygraphViewReuseAllocation(TestDygraphViewReuseAllocation):
+    def init_shape(self):
+        self.input_shape = [3, 4]
+        self.output_shape = [12]
+
+    def view_api_processing(self, var):
+        return paddle.flatten(var)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py
index 5aa4e76b97..adb3f5a3c5 100644
--- a/python/paddle/tensor/manipulation.py
+++ b/python/paddle/tensor/manipulation.py
@@ -167,6 +167,10 @@ def flatten(x, start_axis=0, stop_axis=-1, name=None):
 
     Flattens a contiguous range of axes in a tensor according to start_axis and stop_axis.
 
+    Note that the output Tensor will share data with origin Tensor and doesn't have a 
+    Tensor copy in ``dygraph`` mode. If you want to use the Tensor copy version, please 
+    use `Tensor.clone` like ``flatten_clone_x = x.flatten().clone()``.
+
     For Example:
 
     .. code-block:: text
@@ -219,12 +223,16 @@ def flatten(x, start_axis=0, stop_axis=-1, name=None):
             import paddle
 
             image_shape=(2, 3, 4, 4)
-            
+
             x = paddle.arange(end=image_shape[0] * image_shape[1] * image_shape[2] * image_shape[3])
             img = paddle.reshape(x, image_shape)
-            
+
             out = paddle.flatten(img, start_axis=1, stop_axis=2)
             # out shape is [2, 12, 4]
+
+            # out shares data with img in dygraph mode
+            img[0, 0, 0, 0] = -1
+            print(out[0, 0, 0]) # [-1]
     """
     if not (isinstance(x, Variable)):
         raise ValueError("The input x should be a Tensor")
@@ -479,6 +487,10 @@ def split(x, num_or_sections, axis=0, name=None):
 def squeeze(x, axis=None, name=None):
     """
     This OP will squeeze the dimension(s) of size 1 of input tensor x's shape. 
+    
+    Note that the output Tensor will share data with origin Tensor and doesn't have a 
+    Tensor copy in ``dygraph`` mode. If you want to use the Tensor copy version, 
+    please use `Tensor.clone` like ``squeeze_clone_x = x.squeeze().clone()``.
 
     If axis is provided, it will remove the dimension(s) by given axis that of size 1. 
     If the dimension of given axis is not of size 1, the dimension remain unchanged. 
@@ -536,8 +548,14 @@ def squeeze(x, axis=None, name=None):
             
             x = paddle.rand([5, 1, 10])
             output = paddle.squeeze(x, axis=1)
+
+            print(x.shape)  # [5, 1, 10]
             print(output.shape)  # [5, 10]
 
+            # output shares data with x in dygraph mode
+            x[0, 0, 0] = 10.
+            print(output[0, 0]) # [10.]
+
     """
     if axis is None:
         axis = []
@@ -678,6 +696,10 @@ def unsqueeze(x, axis, name=None):
     required argument axis, a dimension or list of dimensions that will be inserted.
     Dimension indices in axis are as seen in the output tensor.
 
+    Note that the output Tensor will share data with origin Tensor and doesn't have a 
+    Tensor copy in ``dygraph`` mode. If you want to use the Tensor copy version, 
+    please use `Tensor.clone` like ``unsqueeze_clone_x = x.unsqueeze(-1).clone()``.
+
     Args:
         x (Tensor): The input Tensor to be unsqueezed. Supported data type: float32, float64, bool, int8, int32, int64.
         axis (int|list|tuple|Tensor): Indicates the dimensions to be inserted. The data type is ``int32`` . 
@@ -706,6 +728,12 @@ def unsqueeze(x, axis, name=None):
             axis = paddle.to_tensor([0, 1, 2])
             out3 = paddle.unsqueeze(x, axis=axis) 
             print(out3.shape)  # [1, 1, 1, 5, 10]
+
+            # out1, out2, out3 share data with x in dygraph mode
+            x[0, 0] = 10.
+            print(out1[0, 0, 0]) # [10.]
+            print(out2[0, 0, 0, 0]) # [10.]
+            print(out3[0, 0, 0, 0, 0]) # [10.]
             
     """
 
@@ -1382,6 +1410,11 @@ def reshape(x, shape, name=None):
     """
     This operator changes the shape of ``x`` without changing its data.
 
+    Note that the output Tensor will share data with origin Tensor and doesn't
+    have a Tensor copy in ``dygraph`` mode. 
+    If you want to use the Tensor copy version, please use `Tensor.clone` like 
+    ``reshape_clone_x = x.reshape([-1]).clone()``.
+
     Some tricks exist when specifying the target shape.
 
     1. -1 means the value of this dimension is inferred from the total element
@@ -1430,16 +1463,24 @@ def reshape(x, shape, name=None):
 
             x = paddle.rand([2, 4, 6], dtype="float32")
             positive_four = paddle.full([1], 4, "int32")
+
             out = paddle.reshape(x, [-1, 0, 3, 2])
             print(out)
             # the shape is [2,4,3,2].
+
             out = paddle.reshape(x, shape=[positive_four, 12])
             print(out)
             # the shape of out_2 is [4, 12].
+
             shape_tensor = paddle.to_tensor(np.array([8, 6]).astype("int32"))
             out = paddle.reshape(x, shape=shape_tensor)
             print(out)
             # the shape is [8, 6].
+            # out shares data with x in dygraph mode
+            x[0, 0, 0] = 10.
+            print(out[0, 0])
+            # the value is [10.]
+
     """
     return paddle.fluid.layers.reshape(x=x, shape=shape, name=name)
 
-- 
GitLab