From da16b33f2e2be9b1a144267ab506d78824aed6fc Mon Sep 17 00:00:00 2001 From: pangyoki Date: Sat, 9 Jan 2021 18:36:39 +0800 Subject: [PATCH] add View(reuse allocation) strategy on squeeze, unsqueeze, reshape, flatten op (#29913) * add view strategy on squeeze,unsqueeze,reshape,flatten * add squeeze unittest * add unittests * use View strategy as name rather than Reuse Allacation * fix view api doc * fix format * use core.ops when input of reshape2 is Tensor * fix test_cross_entropy_loss error because of reshape2 * delete selected_rows * change op_function * little change * solve HandleViewBetweenInputAndOutput --- paddle/fluid/pybind/op_function.h | 26 ++++ paddle/fluid/pybind/op_function_generator.cc | 33 ++++- .../test_view_op_reuse_allocation.py | 118 ++++++++++++++++++ python/paddle/tensor/manipulation.py | 45 ++++++- 4 files changed, 219 insertions(+), 3 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/test_view_op_reuse_allocation.py diff --git a/paddle/fluid/pybind/op_function.h b/paddle/fluid/pybind/op_function.h index 1e20ac958b..0c45753121 100644 --- a/paddle/fluid/pybind/op_function.h +++ b/paddle/fluid/pybind/op_function.h @@ -147,6 +147,32 @@ ConstructDuplicableOutput(const size_t num) { } return res; } + +static inline void HandleViewBetweenInputAndOutput( + const std::shared_ptr& input_var, + const std::shared_ptr& view_output_var) { + PADDLE_ENFORCE_EQ( + input_var->Var().IsInitialized(), true, + platform::errors::InvalidArgument("Tensor %s has not been initialized!", + input_var->Name())); + + if (input_var->Var().IsType()) { + const auto& input_tensor = input_var->Var().Get(); + PADDLE_ENFORCE_EQ( + input_tensor.IsInitialized(), true, + platform::errors::InvalidArgument( + "LoDTensor %s has not been initialized!", input_var->Name())); + + auto* view_output_tensor = + view_output_var->MutableVar()->GetMutable(); + view_output_tensor->ShareDataWith(input_tensor); + view_output_tensor->ShareInplaceVersionCounterWith(input_tensor); + + VLOG(3) << "Perform View between Output Var(" << view_output_var->Name() + << ") and Input Var(" << input_var->Name() + << "), share allocation and inplace version."; + } +} } // namespace pybind } // namespace paddle diff --git a/paddle/fluid/pybind/op_function_generator.cc b/paddle/fluid/pybind/op_function_generator.cc index b011511487..349162c2e5 100644 --- a/paddle/fluid/pybind/op_function_generator.cc +++ b/paddle/fluid/pybind/op_function_generator.cc @@ -139,6 +139,19 @@ std::map> op_passing_outs_map = { {"rnn", {"DropoutState"}}, }; +// NOTE(pangyoki): Tensor View Strategy. +// In this case, a new output varbase will be created, and this varbase will +// reuse the input varbase's allocation. +// It's a 2-layer map. The key of outer map is the view op name, the value is +// also a map which implies the mapping relationship between the output and +// input varbase. +std::map> view_op_map = { + {"squeeze2", {"X", "Out"}}, // "X" -> "Out" + {"unsqueeze2", {"X", "Out"}}, + {"reshape2", {"X", "Out"}}, + {"flatten_contiguous_range", {"X", "Out"}}, +}; + // clang-format off const char* OUT_INITIALIZER_TEMPLATE = R"({"%s", {std::shared_ptr(new imperative::VarBase(tracer->GenerateUniqueName()))}})"; @@ -194,6 +207,11 @@ const char* RETURN_TEMPLATE = R"(outs["%s"][0])"; const char* FUNCTION_ARGS = R"(%s, const py::args& args)"; const char* FUNCTION_ARGS_NO_INPUT = R"(const py::args& args)"; +const char* HandleViewBetweenInputAndOutput = R"( + if (ins.count("%s") && outs.count("%s")) { + HandleViewBetweenInputAndOutput(ins["%s"][0], outs["%s"][0]); + })"; + const char* OP_FUNCTION_TEMPLATE = R"( %s %s(%s) @@ -230,6 +248,10 @@ static inline bool FindPassingOutsMap(const std::string& op_type, return op_passing_outs_map[op_type].count(out_name); } +static inline bool FindViewOpMap(const std::string& op_type) { + return view_op_map.count(op_type); +} + static inline std::string TempName(const std::string& name) { return name + '_'; } @@ -260,6 +282,7 @@ GenerateOpFunctions(const std::string& module_name) { int arg_idx = 0; int input_args_num = 0; std::string ins_cast_str = ""; + std::string view_strategy_str = ""; for (auto& input : op_proto->inputs()) { auto& in_name = input.name(); // skip those dispensable inputs, like ResidualData in conv2d @@ -375,6 +398,13 @@ GenerateOpFunctions(const std::string& module_name) { return_str.pop_back(); } outs_initializer += "}"; + if (FindViewOpMap(op_type)) { + std::string viwe_input_name = view_op_map[op_type].first; + std::string viwe_output_name = view_op_map[op_type].second; + view_strategy_str += paddle::string::Sprintf( + HandleViewBetweenInputAndOutput, viwe_input_name, viwe_output_name, + viwe_input_name, viwe_output_name); + } if (outs_num == 0) { return_type = "void"; } @@ -394,7 +424,8 @@ GenerateOpFunctions(const std::string& module_name) { auto op_function_str = paddle::string::Sprintf( OP_FUNCTION_TEMPLATE, return_type, func_name, function_args, ins_cast_str, op_type, input_args_num, outs_initializer, - ins_initializer, ins_initializer_with_null + outs_initializer_with_null, + ins_initializer, ins_initializer_with_null + + outs_initializer_with_null + view_strategy_str, op_type, return_str); // generate pybind item diff --git a/python/paddle/fluid/tests/unittests/test_view_op_reuse_allocation.py b/python/paddle/fluid/tests/unittests/test_view_op_reuse_allocation.py new file mode 100644 index 0000000000..9cabcf49bc --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_view_op_reuse_allocation.py @@ -0,0 +1,118 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import unittest + +import numpy as np + +from op_test import OpTest +import paddle + + +# NOTE(pangyoki): Tensor View Strategy. +# Refer to `op_function_generator.py`. +# For view op, a new output varbase will be created, and this varbase will +# reuse the input varbase's allocation. +# View APIs include: `squeeze`, `unsqueeze`, `reshape`, `flatten`, `detach` +class TestDygraphViewReuseAllocation(unittest.TestCase): + def setUp(self): + self.init_shape() + + def init_shape(self): + self.input_shape = [2, 3, 1] + self.output_shape = [2, 3] + + def view_api_processing(self, var): + return paddle.squeeze(var) + + def test_view_api(self): + var = paddle.rand(self.input_shape) + view_var = self.view_api_processing(var) + view_var[0] = 2. + self.assertEqual(var.shape, self.input_shape) + self.assertEqual(view_var.shape, self.output_shape) + + var_numpy = var.numpy().reshape(self.output_shape) + view_var_numpy = view_var.numpy() + self.assertTrue(np.array_equal(var_numpy, view_var_numpy)) + + def test_forward_version(self): + var = paddle.rand(self.input_shape) + self.assertEqual(var.inplace_version, 0) + view_var = self.view_api_processing(var) + self.assertEqual(view_var.inplace_version, 0) + + var[0] = 2. + self.assertEqual(var.inplace_version, 1) + self.assertEqual(view_var.inplace_version, 1) + + view_var_2 = self.view_api_processing(var) + self.assertEqual(view_var_2.inplace_version, 1) + + var[0] = 3. + self.assertEqual(view_var.inplace_version, 2) + self.assertEqual(view_var_2.inplace_version, 2) + + def test_backward_error(self): + # It raises an error because the inplace operator will result + # in incorrect gradient computation. + with paddle.fluid.dygraph.guard(): + var_a = paddle.ones(shape=self.input_shape, dtype="float32") + var_a.stop_gradient = False + + var_b = var_a**2 + + # Here, the gradient computation will use the value of var_b + var_c = var_b**2 + view_var_b = self.view_api_processing(var_b) + view_var_b[0] = 2. # var_b is modified inplace + + loss = paddle.nn.functional.relu(var_c) + with self.assertRaisesRegexp( + RuntimeError, + "received tensor_version:{} != wrapper_version_snapshot:{}". + format(1, 0)): + loss.backward() + + +class TestUnsqueezeDygraphViewReuseAllocation(TestDygraphViewReuseAllocation): + def init_shape(self): + self.input_shape = [2, 3] + self.output_shape = [2, 3, 1] + + def view_api_processing(self, var): + return paddle.unsqueeze(var, -1) + + +class TestReshapeDygraphViewReuseAllocation(TestDygraphViewReuseAllocation): + def init_shape(self): + self.input_shape = [3, 4] + self.output_shape = [2, 2, 3] + + def view_api_processing(self, var): + return paddle.reshape(var, [2, 2, 3]) + + +class TestFlattenDygraphViewReuseAllocation(TestDygraphViewReuseAllocation): + def init_shape(self): + self.input_shape = [3, 4] + self.output_shape = [12] + + def view_api_processing(self, var): + return paddle.flatten(var) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py index 5aa4e76b97..adb3f5a3c5 100644 --- a/python/paddle/tensor/manipulation.py +++ b/python/paddle/tensor/manipulation.py @@ -167,6 +167,10 @@ def flatten(x, start_axis=0, stop_axis=-1, name=None): Flattens a contiguous range of axes in a tensor according to start_axis and stop_axis. + Note that the output Tensor will share data with origin Tensor and doesn't have a + Tensor copy in ``dygraph`` mode. If you want to use the Tensor copy version, please + use `Tensor.clone` like ``flatten_clone_x = x.flatten().clone()``. + For Example: .. code-block:: text @@ -219,12 +223,16 @@ def flatten(x, start_axis=0, stop_axis=-1, name=None): import paddle image_shape=(2, 3, 4, 4) - + x = paddle.arange(end=image_shape[0] * image_shape[1] * image_shape[2] * image_shape[3]) img = paddle.reshape(x, image_shape) - + out = paddle.flatten(img, start_axis=1, stop_axis=2) # out shape is [2, 12, 4] + + # out shares data with img in dygraph mode + img[0, 0, 0, 0] = -1 + print(out[0, 0, 0]) # [-1] """ if not (isinstance(x, Variable)): raise ValueError("The input x should be a Tensor") @@ -479,6 +487,10 @@ def split(x, num_or_sections, axis=0, name=None): def squeeze(x, axis=None, name=None): """ This OP will squeeze the dimension(s) of size 1 of input tensor x's shape. + + Note that the output Tensor will share data with origin Tensor and doesn't have a + Tensor copy in ``dygraph`` mode. If you want to use the Tensor copy version, + please use `Tensor.clone` like ``squeeze_clone_x = x.squeeze().clone()``. If axis is provided, it will remove the dimension(s) by given axis that of size 1. If the dimension of given axis is not of size 1, the dimension remain unchanged. @@ -536,8 +548,14 @@ def squeeze(x, axis=None, name=None): x = paddle.rand([5, 1, 10]) output = paddle.squeeze(x, axis=1) + + print(x.shape) # [5, 1, 10] print(output.shape) # [5, 10] + # output shares data with x in dygraph mode + x[0, 0, 0] = 10. + print(output[0, 0]) # [10.] + """ if axis is None: axis = [] @@ -678,6 +696,10 @@ def unsqueeze(x, axis, name=None): required argument axis, a dimension or list of dimensions that will be inserted. Dimension indices in axis are as seen in the output tensor. + Note that the output Tensor will share data with origin Tensor and doesn't have a + Tensor copy in ``dygraph`` mode. If you want to use the Tensor copy version, + please use `Tensor.clone` like ``unsqueeze_clone_x = x.unsqueeze(-1).clone()``. + Args: x (Tensor): The input Tensor to be unsqueezed. Supported data type: float32, float64, bool, int8, int32, int64. axis (int|list|tuple|Tensor): Indicates the dimensions to be inserted. The data type is ``int32`` . @@ -706,6 +728,12 @@ def unsqueeze(x, axis, name=None): axis = paddle.to_tensor([0, 1, 2]) out3 = paddle.unsqueeze(x, axis=axis) print(out3.shape) # [1, 1, 1, 5, 10] + + # out1, out2, out3 share data with x in dygraph mode + x[0, 0] = 10. + print(out1[0, 0, 0]) # [10.] + print(out2[0, 0, 0, 0]) # [10.] + print(out3[0, 0, 0, 0, 0]) # [10.] """ @@ -1382,6 +1410,11 @@ def reshape(x, shape, name=None): """ This operator changes the shape of ``x`` without changing its data. + Note that the output Tensor will share data with origin Tensor and doesn't + have a Tensor copy in ``dygraph`` mode. + If you want to use the Tensor copy version, please use `Tensor.clone` like + ``reshape_clone_x = x.reshape([-1]).clone()``. + Some tricks exist when specifying the target shape. 1. -1 means the value of this dimension is inferred from the total element @@ -1430,16 +1463,24 @@ def reshape(x, shape, name=None): x = paddle.rand([2, 4, 6], dtype="float32") positive_four = paddle.full([1], 4, "int32") + out = paddle.reshape(x, [-1, 0, 3, 2]) print(out) # the shape is [2,4,3,2]. + out = paddle.reshape(x, shape=[positive_four, 12]) print(out) # the shape of out_2 is [4, 12]. + shape_tensor = paddle.to_tensor(np.array([8, 6]).astype("int32")) out = paddle.reshape(x, shape=shape_tensor) print(out) # the shape is [8, 6]. + # out shares data with x in dygraph mode + x[0, 0, 0] = 10. + print(out[0, 0]) + # the value is [10.] + """ return paddle.fluid.layers.reshape(x=x, shape=shape, name=name) -- GitLab