From 1c7001e731099061370447b1e1f0e1d0ba164742 Mon Sep 17 00:00:00 2001 From: hong <43953930+phlrain@users.noreply.github.com> Date: Mon, 4 Apr 2022 12:14:43 +0800 Subject: [PATCH] Add dropout yaml (#41355) * add dropout slice yaml * remove useless code * fix infer shape error * skip infrt compile for dropout --- paddle/fluid/framework/op_desc.cc | 11 +++- paddle/fluid/framework/op_desc.h | 2 +- paddle/fluid/operators/dropout_op.cc | 2 +- paddle/phi/infermeta/binary.cc | 20 +++++++ paddle/phi/infermeta/binary.h | 10 ++++ paddle/phi/infermeta/unary.cc | 57 +++++++++++++++---- paddle/phi/infermeta/unary.h | 11 +++- python/paddle/fluid/backward.py | 1 + python/paddle/fluid/layers/nn.py | 17 ++---- .../fluid/tests/unittests/test_dropout_op.py | 31 ++++++++++ .../fluid/tests/unittests/test_slice_op.py | 26 +++++++++ python/paddle/nn/functional/common.py | 10 +++- python/paddle/utils/code_gen/api.yaml | 19 +++++++ python/paddle/utils/code_gen/backward.yaml | 21 +++++++ tools/infrt/skipped_phi_api.json | 2 +- 15 files changed, 209 insertions(+), 31 deletions(-) diff --git a/paddle/fluid/framework/op_desc.cc b/paddle/fluid/framework/op_desc.cc index a02466c04e9..f31fefcfade 100644 --- a/paddle/fluid/framework/op_desc.cc +++ b/paddle/fluid/framework/op_desc.cc @@ -777,10 +777,17 @@ void OpDesc::CheckAttrs() { checker->Check(&attrs_); } -void OpDesc::InferShape(const BlockDesc &block) const { +void OpDesc::InferShape(const BlockDesc &block) { try { VLOG(3) << "CompileTime infer shape on " << Type(); - auto &infer_shape = OpInfoMap::Instance().Get(this->Type()).infer_shape_; + auto &op_info = OpInfoMap::Instance().Get(this->Type()); + auto *checker = op_info.Checker(); + if (checker != nullptr) { + // set dafault value here + VLOG(10) << "begin to check attribute of " << Type(); + checker->Check(&attrs_); + } + auto &infer_shape = op_info.infer_shape_; PADDLE_ENFORCE_EQ( static_cast(infer_shape), true, platform::errors::NotFound( diff --git a/paddle/fluid/framework/op_desc.h b/paddle/fluid/framework/op_desc.h index 82e15d40bee..0afe6796dad 100644 --- a/paddle/fluid/framework/op_desc.h +++ b/paddle/fluid/framework/op_desc.h @@ -142,7 +142,7 @@ class OpDesc { void CheckAttrs(); - void InferShape(const BlockDesc &block) const; + void InferShape(const BlockDesc &block); void InferVarType(BlockDesc *block) const; diff --git a/paddle/fluid/operators/dropout_op.cc b/paddle/fluid/operators/dropout_op.cc index 3d9950902ac..8d033ea3194 100644 --- a/paddle/fluid/operators/dropout_op.cc +++ b/paddle/fluid/operators/dropout_op.cc @@ -16,7 +16,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/infermeta/unary.h" +#include "paddle/phi/infermeta/binary.h" namespace paddle { namespace operators { diff --git a/paddle/phi/infermeta/binary.cc b/paddle/phi/infermeta/binary.cc index ab13df081aa..60db5d342b8 100644 --- a/paddle/phi/infermeta/binary.cc +++ b/paddle/phi/infermeta/binary.cc @@ -776,6 +776,26 @@ void DistInferMeta(const MetaTensor& x, out->set_dtype(x.dtype()); } +void DropoutInferMeta(const MetaTensor& x, + paddle::optional seed_tensor, + float p, + bool is_test, + const std::string& mode, + int seed, + bool fix_seed, + MetaTensor* out, + MetaTensor* mask) { + auto x_dims = x.dims(); + out->set_dims(x_dims); + out->share_lod(x); + out->set_dtype(x.dtype()); + + if (mask != nullptr) { + mask->set_dims(x_dims); + mask->set_dtype(DataType::UINT8); + } +} + void DotInferMeta(const MetaTensor& x, const MetaTensor& y, MetaTensor* out) { auto x_dims = x.dims(); auto x_rank = static_cast(x_dims.size()); diff --git a/paddle/phi/infermeta/binary.h b/paddle/phi/infermeta/binary.h index 3fcbf69c35e..296c05756f2 100644 --- a/paddle/phi/infermeta/binary.h +++ b/paddle/phi/infermeta/binary.h @@ -124,6 +124,16 @@ void DistInferMeta(const MetaTensor& x, void DotInferMeta(const MetaTensor& x, const MetaTensor& y, MetaTensor* out); +void DropoutInferMeta(const MetaTensor& x, + paddle::optional seed_tensor, + float p, + bool is_test, + const std::string& mode, + int seed, + bool fix_seed, + MetaTensor* out, + MetaTensor* mask); + void ElementwiseInferMeta(const MetaTensor& x, const MetaTensor& y, MetaTensor* out); diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc index 36c192cbf27..e0ea637074c 100644 --- a/paddle/phi/infermeta/unary.cc +++ b/paddle/phi/infermeta/unary.cc @@ -24,6 +24,7 @@ limitations under the License. */ #include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/kernels/funcs/parse_qr_mode.h" #include "paddle/phi/kernels/funcs/pooling.h" +#include "paddle/phi/kernels/funcs/slice_utils.h" #include "paddle/phi/kernels/funcs/strided_slice.h" #include "paddle/phi/kernels/funcs/unfold_functor.h" #include "paddle/phi/kernels/funcs/unsqueeze.h" @@ -360,17 +361,6 @@ void DiagonalInferMeta(const MetaTensor& input, out->set_dims(phi::make_ddim(out_dims)); } -void DropoutInferMeta(const MetaTensor& x, MetaTensor* out, MetaTensor* mask) { - auto x_dims = x.dims(); - out->set_dims(x_dims); - out->share_lod(x); - out->set_dtype(x.dtype()); - - if (mask != nullptr) { - mask->set_dims(x_dims); - } -} - void EighInferMeta(const MetaTensor& x, const std::string& uplo, MetaTensor* out_w, @@ -1738,6 +1728,51 @@ void SizeInferMeta(const MetaTensor& input, MetaTensor* out) { out->set_dims({1}); } +void SliceRawInferMeta(const MetaTensor& input, + const std::vector& axes, + const IntArray& starts_arr, + const IntArray& ends_arr, + const std::vector& infer_flags_t, + const std::vector& decrease_axis, + MetaTensor* out, + MetaConfig config) { + auto in_dims = input.dims(); + PADDLE_ENFORCE_LT( + in_dims.size(), + 7, + phi::errors::InvalidArgument("The rank of input should be less than 7.")); + DDim out_dims(in_dims); + + std::vector infer_flags = infer_flags_t; + if (infer_flags.empty()) { + // Initialize infer_flags with 1. + // To be compatible with other op tests in which infer_flags is not set. + infer_flags = std::vector(axes.size(), 1); + } + + // 2.1 Check attrs. + std::vector starts = starts_arr.GetData(); + std::vector ends = ends_arr.GetData(); + + phi::funcs::CheckAndUpdateSliceAttrs( + in_dims, axes, &starts, &ends, nullptr, &infer_flags); + + auto slice_dims = phi::funcs::GetSliceDims( + in_dims, axes, starts, ends, nullptr, &infer_flags); + if (config.is_runtime) { + out_dims = phi::funcs::GetDecreasedDims( + slice_dims, decrease_axis, &infer_flags); + } else { + out_dims = phi::funcs::GetDecreasedDims( + slice_dims, decrease_axis, nullptr); + } + + out->set_dims(out_dims); + if (axes.size() > 0 && axes[0] != 0) { + out->share_lod(input); + } +} + void SoftmaxInferMeta(const MetaTensor& x, int axis, MetaTensor* out) { auto dim_x = x.dims(); auto rank_x = dim_x.size(); diff --git a/paddle/phi/infermeta/unary.h b/paddle/phi/infermeta/unary.h index bda9c83fce1..5106c6f4487 100644 --- a/paddle/phi/infermeta/unary.h +++ b/paddle/phi/infermeta/unary.h @@ -80,8 +80,6 @@ void DiagInferMeta(const MetaTensor& x, void DiagonalInferMeta( const MetaTensor& input, int offset, int axis1, int axis2, MetaTensor* out); -void DropoutInferMeta(const MetaTensor& x, MetaTensor* out, MetaTensor* mask); - void EighInferMeta(const MetaTensor& x, const std::string& uplo, MetaTensor* out_w, @@ -271,6 +269,15 @@ void ShardIndexInferMeta(const MetaTensor& in, void SizeInferMeta(const MetaTensor& input, MetaTensor* out); +void SliceRawInferMeta(const MetaTensor& input, + const std::vector& axes, + const IntArray& starts, + const IntArray& ends, + const std::vector& infer_flags, + const std::vector& decrease_axis, + MetaTensor* out, + MetaConfig config = MetaConfig()); + void SoftmaxInferMeta(const MetaTensor& x, int axis, MetaTensor* out); void SplitInferMeta(const MetaTensor& x_meta, diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py index 0988f670955..ba7692b442f 100755 --- a/python/paddle/fluid/backward.py +++ b/python/paddle/fluid/backward.py @@ -1337,6 +1337,7 @@ def _append_backward_vars_(block, start_op_idx, grad_to_var, grad_info_map): continue grad_info_map[grad_to_var[grad_var_name]] = (grad_var_name, block) # infer_shape and infer_type + op_desc.check_attrs() op_desc.infer_var_type(block.desc) op_desc.infer_shape(block.desc) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index d7ec3276d8b..9f971faed34 100755 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -5141,7 +5141,6 @@ def l2_normalize(x, axis, epsilon=1e-12, name=None): # [-0.33972208 -0.43014923 0.31772556 0.76617881 -0.10761525]] """ - if len(x.shape) == 1: axis = 0 if _non_static_mode(): @@ -11199,18 +11198,15 @@ def slice(input, axes, starts, ends): infer_flags = list(1 for i in range(len(axes))) tmp_tensor_type = core.eager.Tensor - if isinstance(starts, (list, tuple)): starts = [ item.numpy().item(0) if isinstance(item, tmp_tensor_type) else item for item in starts ] - attrs += ('starts', starts) elif isinstance(starts, tmp_tensor_type): - starts_tensor = starts - starts.stop_gradient = True - infer_flags = list(-1 for i in range(len(axes))) + tensor_t = starts.numpy() + starts = [ele for ele in tensor_t] if isinstance(ends, (list, tuple)): ends = [ @@ -11219,12 +11215,11 @@ def slice(input, axes, starts, ends): ] attrs += ('ends', ends) elif isinstance(ends, tmp_tensor_type): - ends_tensor = ends - ends_tensor.stop_gradient = True - infer_flags = list(-1 for i in range(len(axes))) + tensor_t = ends.numpy() + ends = [ele for ele in tensor_t] - return _C_ops.slice(input, starts_tensor, ends_tensor, None, None, - 'axes', axes, 'infer_flags', infer_flags, *attrs) + return _C_ops.final_state_slice(input, axes, starts, ends, infer_flags, + []) else: if _in_legacy_dygraph(): attrs = () diff --git a/python/paddle/fluid/tests/unittests/test_dropout_op.py b/python/paddle/fluid/tests/unittests/test_dropout_op.py index 09712005d41..d8a4eb8f45f 100644 --- a/python/paddle/fluid/tests/unittests/test_dropout_op.py +++ b/python/paddle/fluid/tests/unittests/test_dropout_op.py @@ -22,8 +22,11 @@ import paddle import paddle.static as static import paddle.fluid as fluid from paddle.fluid import Program, program_guard +from paddle.fluid.framework import _test_eager_guard import os +from paddle import _C_ops + class TestDropoutOp(OpTest): def setUp(self): @@ -960,6 +963,19 @@ class TestDropoutBackward(unittest.TestCase): np.array_equal(input.gradient( ), self.cal_grad_downscale_in_infer(mask.numpy()))) + def test_backward_downscale_in_infer_eager(self): + for place in self.places: + with fluid.dygraph.guard(place): + with _test_eager_guard(): + input = paddle.uniform([40, 40], dtype="float32") + input.stop_gradient = False + out, mask = _C_ops.final_state_dropout( + input, None, 0.5, False, "downgrade_in_infer", 0, False) + out.backward() + self.assertTrue( + np.array_equal(input.gradient( + ), self.cal_grad_downscale_in_infer(mask.numpy()))) + def test_backward_upscale_train(self): for place in self.places: with fluid.dygraph.guard(place): @@ -976,6 +992,21 @@ class TestDropoutBackward(unittest.TestCase): np.allclose(input.gradient( ), self.cal_grad_upscale_train(mask.numpy(), prob))) + def test_backward_upscale_train_eager(self): + for place in self.places: + with fluid.dygraph.guard(place): + with _test_eager_guard(): + prob = 0.5 + input = paddle.uniform([40, 40], dtype="float32") + input.stop_gradient = False + out, mask = _C_ops.final_state_dropout( + input, None, 0.5, False, "upscale_in_train", 0, False) + out.backward() + + self.assertTrue( + np.allclose(input.gradient( + ), self.cal_grad_upscale_train(mask.numpy(), prob))) + def test_backward_upscale_train_2(self): for place in self.places: with fluid.dygraph.guard(place): diff --git a/python/paddle/fluid/tests/unittests/test_slice_op.py b/python/paddle/fluid/tests/unittests/test_slice_op.py index 71869b96aed..a565bba3041 100644 --- a/python/paddle/fluid/tests/unittests/test_slice_op.py +++ b/python/paddle/fluid/tests/unittests/test_slice_op.py @@ -21,6 +21,7 @@ from op_test import OpTest, convert_float_to_uint16 import paddle.fluid as fluid import paddle.fluid.layers as layers import paddle +from paddle.fluid.framework import _test_eager_guard paddle.enable_static() @@ -599,6 +600,31 @@ class TestSliceApiWithTensor(unittest.TestCase): self.assertTrue(np.array_equal(y_paddle.numpy(), y_np)) +class TestSliceApiEager(unittest.TestCase): + def test_slice_api(self): + with paddle.fluid.dygraph.guard(): + with _test_eager_guard(): + a = paddle.rand(shape=[4, 5, 6], dtype='float32') + a.stop_gradient = False + axes = [0, 1, 2] + starts = [-3, 0, 2] + ends = [3, 2, 4] + a_1 = paddle.slice(a, axes=axes, starts=starts, ends=ends) + + a_2 = paddle.slice( + a, + axes=axes, + starts=paddle.to_tensor(starts), + ends=paddle.to_tensor(ends)) + + a_1.backward() + grad_truth = paddle.zeros_like(a) + grad_truth[-3:3, 0:2, 2:4] = 1 + self.assertTrue(np.array_equal(grad_truth, a.gradient())) + + self.assertTrue(np.allclose(a_1.numpy(), a[-3:3, 0:2, 2:4])) + + class TestSliceApiWithLoDTensorArray(unittest.TestCase): def setUp(self): self.shape = (3, 4) diff --git a/python/paddle/nn/functional/common.py b/python/paddle/nn/functional/common.py index 131d31aa024..74df8f6ed5c 100644 --- a/python/paddle/nn/functional/common.py +++ b/python/paddle/nn/functional/common.py @@ -28,7 +28,7 @@ from ...tensor import clip from ...tensor import sum from ...tensor import sqrt from ...fluid.data_feeder import check_variable_and_dtype, check_dtype -from ...fluid.framework import _varbase_creator, _in_legacy_dygraph, in_dygraph_mode +from ...fluid.framework import _varbase_creator, _in_legacy_dygraph, in_dygraph_mode, _non_static_mode from ...fluid import dygraph_utils from ...fluid import layers @@ -895,9 +895,15 @@ def dropout(x, seed = None mode = 'downgrade_in_infer' if mode == 'downscale_in_infer' else mode #semantic transfer - if in_dynamic_mode(): + if _non_static_mode(): if default_main_program().random_seed != 0: seed = default_main_program().random_seed + + if in_dygraph_mode(): + out, mask = _C_ops.final_state_dropout( x, None, p, not training, mode, \ + seed if seed is not None else 0, seed is not None) + + return out out, mask = _C_ops.dropout( x, 'dropout_prob', p, 'is_test', not training, 'fix_seed', seed is not None, 'seed', seed diff --git a/python/paddle/utils/code_gen/api.yaml b/python/paddle/utils/code_gen/api.yaml index 4f05f107bc2..2b0c562dbf9 100644 --- a/python/paddle/utils/code_gen/api.yaml +++ b/python/paddle/utils/code_gen/api.yaml @@ -463,6 +463,16 @@ kernel : func : dot +- api : dropout + args : (Tensor x, Tensor seed_tensor, float p, bool is_test, str mode, int seed, bool fix_seed) + output : Tensor(out), Tensor(mask) + infer_meta : + func : DropoutInferMeta + kernel : + func : dropout + optional : seed_tensor + backward : dropout_grad + # eigh - api : eigh args : (Tensor x, str uplo) @@ -1504,6 +1514,15 @@ kernel : func : size +- api : slice + args : (Tensor input, int64_t[] axes, IntArray starts, IntArray ends, int64_t[] infer_flags, int64_t[] decrease_axis) + output : Tensor + infer_meta : + func : SliceRawInferMeta + kernel : + func : slice + backward : slice_grad + # soft_shrink - api : soft_shrink args : (Tensor x, float lambda) diff --git a/python/paddle/utils/code_gen/backward.yaml b/python/paddle/utils/code_gen/backward.yaml index db1fe6cdf52..cbcfc02ea09 100644 --- a/python/paddle/utils/code_gen/backward.yaml +++ b/python/paddle/utils/code_gen/backward.yaml @@ -301,6 +301,17 @@ kernel : func : divide_grad +- backward_api : dropout_grad + forward : dropout (Tensor x, Tensor seed_tensor, float p, bool is_test, str mode, int seed, bool fix_seed) -> Tensor(out), Tensor(mask) + args : (Tensor mask, Tensor out_grad, float p, bool is_test, str mode) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param : [out_grad] + kernel : + func : dropout_grad + optional : seed_tensor + - backward_api : eigh_grad forward : eigh (Tensor x, str uplo) -> Tensor(out_w), Tensor(out_v) args : (Tensor out_w, Tensor out_v, Tensor out_w_grad, Tensor out_v_grad) @@ -1054,6 +1065,16 @@ kernel : func : sinh_grad +- backward_api : slice_grad + forward : slice (Tensor input, int64_t[] axes, IntArray starts, IntArray ends, int64_t[] infer_flags, int64_t[] decrease_axis) -> Tensor(out) + args : (Tensor input, Tensor out_grad, int64_t[] axes, IntArray starts, IntArray ends, int64_t[] infer_flags, int64_t[] decrease_axis) + output : Tensor(input_grad) + infer_meta : + func : UnchangedInferMeta + param : [input] + kernel : + func : slice_grad + - backward_api : soft_shrink_grad forward : soft_shrink (Tensor x, float lambda) -> Tensor(out) args : (Tensor x, Tensor out_grad, float lambda) diff --git a/tools/infrt/skipped_phi_api.json b/tools/infrt/skipped_phi_api.json index eef57a2d6b7..74cb6fb0e53 100644 --- a/tools/infrt/skipped_phi_api.json +++ b/tools/infrt/skipped_phi_api.json @@ -1,4 +1,4 @@ { -"phi_apis":["conj", "nll_loss", "flatten"], +"phi_apis":["conj", "nll_loss", "dropout", "flatten"], "phi_kernels":["equal_all"] } -- GitLab