未验证 提交 7c943a65 编写于 作者: P pangyoki 提交者: GitHub

[Cherry-pick PR 29913], add View(reuse allocation) strategy on squeeze,...

[Cherry-pick PR 29913], add View(reuse allocation) strategy on squeeze, unsqueeze, reshape, flatten op (#29913) (#30258)

* add view strategy on squeeze,unsqueeze,reshape,flatten

* add squeeze unittest

* add unittests

* use View strategy as name rather than Reuse Allacation

* fix view api doc

* fix format

* use core.ops when input of reshape2 is Tensor

* fix test_cross_entropy_loss error because of reshape2

* delete selected_rows

* change op_function

* little change

* solve HandleViewBetweenInputAndOutput
上级 afbc6367
......@@ -147,6 +147,32 @@ ConstructDuplicableOutput(const size_t num) {
}
return res;
}
static inline void HandleViewBetweenInputAndOutput(
const std::shared_ptr<imperative::VarBase>& input_var,
const std::shared_ptr<imperative::VarBase>& view_output_var) {
PADDLE_ENFORCE_EQ(
input_var->Var().IsInitialized(), true,
platform::errors::InvalidArgument("Tensor %s has not been initialized!",
input_var->Name()));
if (input_var->Var().IsType<framework::LoDTensor>()) {
const auto& input_tensor = input_var->Var().Get<framework::LoDTensor>();
PADDLE_ENFORCE_EQ(
input_tensor.IsInitialized(), true,
platform::errors::InvalidArgument(
"LoDTensor %s has not been initialized!", input_var->Name()));
auto* view_output_tensor =
view_output_var->MutableVar()->GetMutable<framework::LoDTensor>();
view_output_tensor->ShareDataWith(input_tensor);
view_output_tensor->ShareInplaceVersionCounterWith(input_tensor);
VLOG(3) << "Perform View between Output Var(" << view_output_var->Name()
<< ") and Input Var(" << input_var->Name()
<< "), share allocation and inplace version.";
}
}
} // namespace pybind
} // namespace paddle
......
......@@ -133,6 +133,19 @@ std::map<std::string, std::set<std::string>> op_passing_outs_map = {
{"moving_average_abs_max_scale", {"OutScale", "OutAccum", "OutState"}},
};
// NOTE(pangyoki): Tensor View Strategy.
// In this case, a new output varbase will be created, and this varbase will
// reuse the input varbase's allocation.
// It's a 2-layer map. The key of outer map is the view op name, the value is
// also a map which implies the mapping relationship between the output and
// input varbase.
std::map<std::string, std::pair<std::string, std::string>> view_op_map = {
{"squeeze2", {"X", "Out"}}, // "X" -> "Out"
{"unsqueeze2", {"X", "Out"}},
{"reshape2", {"X", "Out"}},
{"flatten_contiguous_range", {"X", "Out"}},
};
// clang-format off
const char* OUT_INITIALIZER_TEMPLATE =
R"({"%s", {std::shared_ptr<imperative::VarBase>(new imperative::VarBase(tracer->GenerateUniqueName()))}})";
......@@ -188,6 +201,11 @@ const char* RETURN_TEMPLATE = R"(outs["%s"][0])";
const char* FUNCTION_ARGS = R"(%s, const py::args& args)";
const char* FUNCTION_ARGS_NO_INPUT = R"(const py::args& args)";
const char* HandleViewBetweenInputAndOutput = R"(
if (ins.count("%s") && outs.count("%s")) {
HandleViewBetweenInputAndOutput(ins["%s"][0], outs["%s"][0]);
})";
const char* OP_FUNCTION_TEMPLATE =
R"(
%s %s(%s)
......@@ -224,6 +242,10 @@ static inline bool FindPassingOutsMap(const std::string& op_type,
return op_passing_outs_map[op_type].count(out_name);
}
static inline bool FindViewOpMap(const std::string& op_type) {
return view_op_map.count(op_type);
}
static inline std::string TempName(const std::string& name) {
return name + '_';
}
......@@ -254,6 +276,7 @@ GenerateOpFunctions(const std::string& module_name) {
int arg_idx = 0;
int input_args_num = 0;
std::string ins_cast_str = "";
std::string view_strategy_str = "";
for (auto& input : op_proto->inputs()) {
auto& in_name = input.name();
// skip those dispensable inputs, like ResidualData in conv2d
......@@ -369,6 +392,13 @@ GenerateOpFunctions(const std::string& module_name) {
return_str.pop_back();
}
outs_initializer += "}";
if (FindViewOpMap(op_type)) {
std::string viwe_input_name = view_op_map[op_type].first;
std::string viwe_output_name = view_op_map[op_type].second;
view_strategy_str += paddle::string::Sprintf(
HandleViewBetweenInputAndOutput, viwe_input_name, viwe_output_name,
viwe_input_name, viwe_output_name);
}
if (outs_num == 0) {
return_type = "void";
}
......@@ -388,7 +418,8 @@ GenerateOpFunctions(const std::string& module_name) {
auto op_function_str = paddle::string::Sprintf(
OP_FUNCTION_TEMPLATE, return_type, func_name, function_args,
ins_cast_str, op_type, input_args_num, outs_initializer,
ins_initializer, ins_initializer_with_null + outs_initializer_with_null,
ins_initializer, ins_initializer_with_null +
outs_initializer_with_null + view_strategy_str,
op_type, return_str);
// generate pybind item
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
from op_test import OpTest
import paddle
# NOTE(pangyoki): Tensor View Strategy.
# Refer to `op_function_generator.py`.
# For view op, a new output varbase will be created, and this varbase will
# reuse the input varbase's allocation.
# View APIs include: `squeeze`, `unsqueeze`, `reshape`, `flatten`, `detach`
class TestDygraphViewReuseAllocation(unittest.TestCase):
def setUp(self):
self.init_shape()
def init_shape(self):
self.input_shape = [2, 3, 1]
self.output_shape = [2, 3]
def view_api_processing(self, var):
return paddle.squeeze(var)
def test_view_api(self):
var = paddle.rand(self.input_shape)
view_var = self.view_api_processing(var)
view_var[0] = 2.
self.assertEqual(var.shape, self.input_shape)
self.assertEqual(view_var.shape, self.output_shape)
var_numpy = var.numpy().reshape(self.output_shape)
view_var_numpy = view_var.numpy()
self.assertTrue(np.array_equal(var_numpy, view_var_numpy))
def test_forward_version(self):
var = paddle.rand(self.input_shape)
self.assertEqual(var.inplace_version, 0)
view_var = self.view_api_processing(var)
self.assertEqual(view_var.inplace_version, 0)
var[0] = 2.
self.assertEqual(var.inplace_version, 1)
self.assertEqual(view_var.inplace_version, 1)
view_var_2 = self.view_api_processing(var)
self.assertEqual(view_var_2.inplace_version, 1)
var[0] = 3.
self.assertEqual(view_var.inplace_version, 2)
self.assertEqual(view_var_2.inplace_version, 2)
def test_backward_error(self):
# It raises an error because the inplace operator will result
# in incorrect gradient computation.
with paddle.fluid.dygraph.guard():
var_a = paddle.ones(shape=self.input_shape, dtype="float32")
var_a.stop_gradient = False
var_b = var_a**2
# Here, the gradient computation will use the value of var_b
var_c = var_b**2
view_var_b = self.view_api_processing(var_b)
view_var_b[0] = 2. # var_b is modified inplace
loss = paddle.nn.functional.relu(var_c)
with self.assertRaisesRegexp(
RuntimeError,
"received tensor_version:{} != wrapper_version_snapshot:{}".
format(1, 0)):
loss.backward()
class TestUnsqueezeDygraphViewReuseAllocation(TestDygraphViewReuseAllocation):
def init_shape(self):
self.input_shape = [2, 3]
self.output_shape = [2, 3, 1]
def view_api_processing(self, var):
return paddle.unsqueeze(var, -1)
class TestReshapeDygraphViewReuseAllocation(TestDygraphViewReuseAllocation):
def init_shape(self):
self.input_shape = [3, 4]
self.output_shape = [2, 2, 3]
def view_api_processing(self, var):
return paddle.reshape(var, [2, 2, 3])
class TestFlattenDygraphViewReuseAllocation(TestDygraphViewReuseAllocation):
def init_shape(self):
self.input_shape = [3, 4]
self.output_shape = [12]
def view_api_processing(self, var):
return paddle.flatten(var)
if __name__ == "__main__":
unittest.main()
......@@ -167,6 +167,10 @@ def flatten(x, start_axis=0, stop_axis=-1, name=None):
Flattens a contiguous range of axes in a tensor according to start_axis and stop_axis.
Note that the output Tensor will share data with origin Tensor and doesn't have a
Tensor copy in ``dygraph`` mode. If you want to use the Tensor copy version, please
use `Tensor.clone` like ``flatten_clone_x = x.flatten().clone()``.
For Example:
.. code-block:: text
......@@ -219,12 +223,16 @@ def flatten(x, start_axis=0, stop_axis=-1, name=None):
import paddle
image_shape=(2, 3, 4, 4)
x = paddle.arange(end=image_shape[0] * image_shape[1] * image_shape[2] * image_shape[3])
img = paddle.reshape(x, image_shape)
out = paddle.flatten(img, start_axis=1, stop_axis=2)
# out shape is [2, 12, 4]
# out shares data with img in dygraph mode
img[0, 0, 0, 0] = -1
print(out[0, 0, 0]) # [-1]
"""
if not (isinstance(x, Variable)):
raise ValueError("The input x should be a Tensor")
......@@ -479,6 +487,10 @@ def split(x, num_or_sections, axis=0, name=None):
def squeeze(x, axis=None, name=None):
"""
This OP will squeeze the dimension(s) of size 1 of input tensor x's shape.
Note that the output Tensor will share data with origin Tensor and doesn't have a
Tensor copy in ``dygraph`` mode. If you want to use the Tensor copy version,
please use `Tensor.clone` like ``squeeze_clone_x = x.squeeze().clone()``.
If axis is provided, it will remove the dimension(s) by given axis that of size 1.
If the dimension of given axis is not of size 1, the dimension remain unchanged.
......@@ -536,8 +548,14 @@ def squeeze(x, axis=None, name=None):
x = paddle.rand([5, 1, 10])
output = paddle.squeeze(x, axis=1)
print(x.shape) # [5, 1, 10]
print(output.shape) # [5, 10]
# output shares data with x in dygraph mode
x[0, 0, 0] = 10.
print(output[0, 0]) # [10.]
"""
if axis is None:
axis = []
......@@ -678,6 +696,10 @@ def unsqueeze(x, axis, name=None):
required argument axis, a dimension or list of dimensions that will be inserted.
Dimension indices in axis are as seen in the output tensor.
Note that the output Tensor will share data with origin Tensor and doesn't have a
Tensor copy in ``dygraph`` mode. If you want to use the Tensor copy version,
please use `Tensor.clone` like ``unsqueeze_clone_x = x.unsqueeze(-1).clone()``.
Args:
x (Tensor): The input Tensor to be unsqueezed. Supported data type: float32, float64, bool, int8, int32, int64.
axis (int|list|tuple|Tensor): Indicates the dimensions to be inserted. The data type is ``int32`` .
......@@ -706,6 +728,12 @@ def unsqueeze(x, axis, name=None):
axis = paddle.to_tensor([0, 1, 2])
out3 = paddle.unsqueeze(x, axis=axis)
print(out3.shape) # [1, 1, 1, 5, 10]
# out1, out2, out3 share data with x in dygraph mode
x[0, 0] = 10.
print(out1[0, 0, 0]) # [10.]
print(out2[0, 0, 0, 0]) # [10.]
print(out3[0, 0, 0, 0, 0]) # [10.]
"""
......@@ -1382,6 +1410,11 @@ def reshape(x, shape, name=None):
"""
This operator changes the shape of ``x`` without changing its data.
Note that the output Tensor will share data with origin Tensor and doesn't
have a Tensor copy in ``dygraph`` mode.
If you want to use the Tensor copy version, please use `Tensor.clone` like
``reshape_clone_x = x.reshape([-1]).clone()``.
Some tricks exist when specifying the target shape.
1. -1 means the value of this dimension is inferred from the total element
......@@ -1430,16 +1463,24 @@ def reshape(x, shape, name=None):
x = paddle.rand([2, 4, 6], dtype="float32")
positive_four = paddle.full([1], 4, "int32")
out = paddle.reshape(x, [-1, 0, 3, 2])
print(out)
# the shape is [2,4,3,2].
out = paddle.reshape(x, shape=[positive_four, 12])
print(out)
# the shape of out_2 is [4, 12].
shape_tensor = paddle.to_tensor(np.array([8, 6]).astype("int32"))
out = paddle.reshape(x, shape=shape_tensor)
print(out)
# the shape is [8, 6].
# out shares data with x in dygraph mode
x[0, 0, 0] = 10.
print(out[0, 0])
# the value is [10.]
"""
return paddle.fluid.layers.reshape(x=x, shape=shape, name=name)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册