From f6dea800c5d089a112bac7b6c12a612412e6851a Mon Sep 17 00:00:00 2001 From: xiaoguoguo626807 <100397923+xiaoguoguo626807@users.noreply.github.com> Date: Fri, 24 Feb 2023 11:56:54 +0800 Subject: [PATCH] =?UTF-8?q?=E3=80=90prim=E3=80=91Slice=20grad=20(#50771)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * support prim test in OpTest * fix cmake * fix op test * fix test_input_spec * disable cinn in reduce_sum unit test * add bfloat16 dtype for sum * add approve rules * polish code * add clear jit program function * convert grad out from tensor to numpy * remove unnecessary code * add only_prim flag * fix flag * fix op test * add attr * fix optest comp inplace error * fix op test * fix op test with guard * add initialization of check_comp flag * fix comp inplace error in op test * rename check_comp with check_prim and add bfloat16 dtype convert * rename comp_op_type to prim_op_type * rename comp to prim * remove useless code * skip ci check for only prim * add no_grad_vars and grad_outputs in prim test * fix var_dict * fix op test for only_prim * fix dy2static bugs * polish some code * temp * modify op test * except cinn test * modify bfp16 * modify pad grad * add pad_grad dtype * start cinn part --------- Co-authored-by: Charles-hit --- paddle/fluid/operators/slice_op.cc | 31 ++++++ paddle/fluid/prim/api/api.yaml | 2 + .../composite_backward_api.h | 61 ++++++++++++ paddle/phi/api/yaml/legacy_backward.yaml | 1 + paddle/phi/kernels/cpu/pad_grad_kernel.cc | 5 +- paddle/phi/kernels/cpu/pad_kernel.cc | 3 +- .../fluid/tests/unittests/CMakeLists.txt | 3 +- .../fluid/tests/unittests/test_slice_op.py | 95 +++++++++++++++---- tools/check_file_diff_approvals.sh | 1 - 9 files changed, 179 insertions(+), 23 deletions(-) diff --git a/paddle/fluid/operators/slice_op.cc b/paddle/fluid/operators/slice_op.cc index 426eec0b0e..2519f3f97f 100644 --- a/paddle/fluid/operators/slice_op.cc +++ b/paddle/fluid/operators/slice_op.cc @@ -18,6 +18,9 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/prim/api/composite_backward/composite_backward_api.h" +#include "paddle/fluid/prim/utils/static/composite_grad_desc_maker.h" +#include "paddle/fluid/prim/utils/static/desc_tensor.h" #include "paddle/phi/kernels/funcs/slice_utils.h" namespace paddle { @@ -409,6 +412,34 @@ class SliceOpGradMaker : public framework::SingleGradOpMaker { } }; +class SliceCompositeGradOpMaker : public prim::CompositeGradOpMakerBase { + using prim::CompositeGradOpMakerBase::CompositeGradOpMakerBase; + + public: + void Apply() override { + paddle::experimental::Tensor input = this->GetSingleForwardInput("Input"); + paddle::experimental::Tensor out_grad = this->GetSingleOutputGrad("Out"); + paddle::experimental::Tensor input_grad = this->GetSingleInputGrad("Input"); + + auto dx_ptr = this->GetOutputPtr(&input_grad); + std::string dx_name = this->GetOutputName(input_grad); + auto axes = this->Attr>("axes"); + auto starts = this->Attr>("starts"); + auto ends = this->Attr>("ends"); + auto infer_flags = this->Attr>("infer_flags"); + auto decrease_axis = this->Attr>("decrease_axis"); + VLOG(6) << "Runing slice_grad composite func"; + prim::slice_grad(input, + out_grad, + axes, + paddle::experimental::IntArray(starts), + paddle::experimental::IntArray(ends), + infer_flags, + decrease_axis, + dx_ptr); + this->RecoverOutputName(input_grad, dx_name); + } +}; template class SliceDoubleOpGradMaker : public framework::SingleGradOpMaker { public: diff --git a/paddle/fluid/prim/api/api.yaml b/paddle/fluid/prim/api/api.yaml index 67353d342f..430b1a2412 100644 --- a/paddle/fluid/prim/api/api.yaml +++ b/paddle/fluid/prim/api/api.yaml @@ -25,3 +25,5 @@ - scatter_nd_add - tile - transpose +- subtract +- pad diff --git a/paddle/fluid/prim/api/composite_backward/composite_backward_api.h b/paddle/fluid/prim/api/composite_backward/composite_backward_api.h index aecf09f463..c9990fdf7d 100644 --- a/paddle/fluid/prim/api/composite_backward/composite_backward_api.h +++ b/paddle/fluid/prim/api/composite_backward/composite_backward_api.h @@ -323,5 +323,66 @@ void exp_grad(const Tensor& out, const Tensor& out_grad, Tensor* x_grad) { } } +template +void slice_grad(const Tensor& input, + const Tensor& out_grad, + const std::vector& axes, + const IntArray& starts, + const IntArray& ends, + const std::vector& infer_flags, + const std::vector& decrease_axis, + Tensor* input_grad) { + if (input_grad) { + size_t rank = input.dims().size(); + auto out_dims = out_grad.dims(); + auto in_dims = input.dims(); + + auto decrease_size = decrease_axis.size(); + if (decrease_size > 0) { + if (decrease_size == static_cast(in_dims.size())) { + // all dims decrease + out_dims = phi::make_ddim(std::vector(decrease_size, 1)); + } else { + std::vector origin_out_shape(out_dims.size() + decrease_size, -1); + for (size_t i = 0; i < decrease_size; ++i) { + origin_out_shape[decrease_axis[i]] = 1; + } + + int index = 0; + for (size_t i = 0; i < origin_out_shape.size(); ++i) { + if (origin_out_shape[i] == -1) { + origin_out_shape[i] = out_dims[index]; + ++index; + } + } + out_dims = phi::make_ddim(origin_out_shape); + } + } + + std::vector offsets(rank, 0); + std::vector extents(rank, 0); + for (size_t i = 0; i < rank; ++i) { + offsets[i] = 0; + extents[i] = out_dims[i]; + } + + for (size_t i = 0; i < axes.size(); ++i) { + int axis = axes[i]; + int64_t start = starts[i] < 0 ? (starts[i] + in_dims[axis]) : starts[i]; + start = std::max(start, static_cast(0)); + offsets[axis] = start; + } + + std::vector paddings; + for (size_t i = 0; i < rank; ++i) { + paddings.push_back(offsets[i]); + paddings.push_back((in_dims[i] - out_dims[i]) - offsets[i]); + } + + auto out_tmp = pad(out_grad, paddings, 0.0); + set_output(out_tmp, input_grad); + } +} + } // namespace prim } // namespace paddle diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml index e27c34ad3b..25e8cf2b78 100755 --- a/paddle/phi/api/yaml/legacy_backward.yaml +++ b/paddle/phi/api/yaml/legacy_backward.yaml @@ -1172,6 +1172,7 @@ param : [input] kernel : func : slice_grad + composite: slice_grad(input, out_grad, axes, starts, ends, infer_flags, decrease_axis) backward : slice_double_grad no_need_buffer : input diff --git a/paddle/phi/kernels/cpu/pad_grad_kernel.cc b/paddle/phi/kernels/cpu/pad_grad_kernel.cc index 67e6da7d0e..af1db8173f 100644 --- a/paddle/phi/kernels/cpu/pad_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/pad_grad_kernel.cc @@ -24,5 +24,8 @@ PD_REGISTER_KERNEL(pad_grad, phi::PadGradKernel, float, double, + int, + int64_t, phi::dtype::complex, - phi::dtype::complex) {} + phi::dtype::complex, + phi::dtype::bfloat16) {} diff --git a/paddle/phi/kernels/cpu/pad_kernel.cc b/paddle/phi/kernels/cpu/pad_kernel.cc index f4a0acdcca..ed0cb2f644 100644 --- a/paddle/phi/kernels/cpu/pad_kernel.cc +++ b/paddle/phi/kernels/cpu/pad_kernel.cc @@ -27,4 +27,5 @@ PD_REGISTER_KERNEL(pad, int, int64_t, phi::dtype::complex, - phi::dtype::complex) {} + phi::dtype::complex, + phi::dtype::bfloat16) {} diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index e7c83de419..ba3b1bf902 100755 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -1202,7 +1202,8 @@ if($ENV{USE_STANDALONE_EXECUTOR}) PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0) endif() -set(TEST_CINN_OPS test_softmax_op test_expand_v2_op test_reduce_op) +set(TEST_CINN_OPS test_softmax_op test_expand_v2_op test_reduce_op + test_slice_op) foreach(TEST_CINN_OPS ${TEST_CINN_OPS}) if(WITH_CINN) diff --git a/python/paddle/fluid/tests/unittests/test_slice_op.py b/python/paddle/fluid/tests/unittests/test_slice_op.py index 157818e794..c5f7a3e969 100644 --- a/python/paddle/fluid/tests/unittests/test_slice_op.py +++ b/python/paddle/fluid/tests/unittests/test_slice_op.py @@ -32,6 +32,9 @@ paddle.enable_static() class TestSliceOp(OpTest): def setUp(self): self.op_type = "slice" + self.prim_op_type = "prim" + self.python_api = paddle.slice + self.enable_cinn = False self.config() self.inputs = {'Input': self.input} self.outputs = {'Out': self.out} @@ -51,10 +54,12 @@ class TestSliceOp(OpTest): self.out = self.input[1:3, 0:3, 2:4, :] def test_check_output(self): - self.check_output() + self.check_output(check_prim=True) def test_check_grad_normal(self): - self.check_grad(['Input'], 'Out', max_relative_error=0.006) + self.check_grad( + ['Input'], 'Out', max_relative_error=0.006, check_prim=True + ) class TestCase1(TestSliceOp): @@ -69,6 +74,7 @@ class TestCase1(TestSliceOp): class TestCase2(TestSliceOp): def config(self): + self.enable_cinn = False self.input = np.random.random([3, 4, 5, 6]).astype("float64") self.starts = [-3, 0, 2] self.ends = [3, 100, -1] @@ -80,6 +86,8 @@ class TestCase2(TestSliceOp): class TestSliceZerosShapeTensor(OpTest): def setUp(self): self.op_type = "slice" + self.prim_op_type = "prim" + self.python_api = paddle.slice self.config() self.inputs = {'Input': self.input} self.outputs = {'Out': self.out} @@ -106,7 +114,10 @@ class TestSliceZerosShapeTensor(OpTest): # 1.2 with attr(decrease) class TestSliceOp_decs_dim(OpTest): def setUp(self): + self.enable_cinn = False self.op_type = "slice" + self.prim_op_type = "prim" + self.python_api = paddle.slice self.config() self.inputs = {'Input': self.input} self.outputs = {'Out': self.out} @@ -128,14 +139,17 @@ class TestSliceOp_decs_dim(OpTest): self.out = self.input[1, 0:3, 2:4, :] def test_check_output(self): - self.check_output() + self.check_output(check_prim=True) def test_check_grad_normal(self): - self.check_grad(['Input'], 'Out', max_relative_error=0.006) + self.check_grad( + ['Input'], 'Out', max_relative_error=0.006, check_prim=True + ) class TestSliceOp_decs_dim_2(TestSliceOp_decs_dim): def config(self): + self.enable_cinn = False self.input = np.random.random([3, 4, 5, 6]).astype("float64") self.starts = [1, 0, 2] self.ends = [2, 1, 4] @@ -147,6 +161,7 @@ class TestSliceOp_decs_dim_2(TestSliceOp_decs_dim): class TestSliceOp_decs_dim_3(TestSliceOp_decs_dim): def config(self): + self.enable_cinn = False self.input = np.random.random([3, 4, 5, 6]).astype("float64") self.starts = [-1, 0, 2] self.ends = [1000000, 1, 4] @@ -158,6 +173,7 @@ class TestSliceOp_decs_dim_3(TestSliceOp_decs_dim): class TestSliceOp_decs_dim_4(TestSliceOp_decs_dim): def config(self): + self.enable_cinn = True self.input = np.random.random([3, 4, 5, 7]).astype("float64") self.starts = [0, 1, 2, 3] self.ends = [1, 2, 3, 4] @@ -169,6 +185,7 @@ class TestSliceOp_decs_dim_4(TestSliceOp_decs_dim): class TestSliceOp_decs_dim_5(TestSliceOp_decs_dim): def config(self): + self.enable_cinn = False self.input = np.random.random([3, 4, 5, 6]).astype("float64") self.starts = [-1] self.ends = [1000000] @@ -178,8 +195,10 @@ class TestSliceOp_decs_dim_5(TestSliceOp_decs_dim): self.out = self.input[:, :, :, -1] +# test_6 with test_2 with test_3 class TestSliceOp_decs_dim_6(TestSliceOp_decs_dim): def config(self): + self.enable_cinn = False self.input = np.random.random([3, 4, 5, 6]).astype("float64") self.starts = [0, 1, 2, 3] self.ends = [1, 2, 3, 4] @@ -194,6 +213,9 @@ class TestSliceOp_decs_dim_6(TestSliceOp_decs_dim): class TestSliceOp_starts_ListTensor(OpTest): def setUp(self): self.op_type = "slice" + self.prim_op_type = "prim" + self.python_api = paddle.slice + # self.enable_cinn = False self.config() starts_tensor = [] @@ -222,10 +244,12 @@ class TestSliceOp_starts_ListTensor(OpTest): self.starts_infer = [-1, 0, -1] def test_check_output(self): - self.check_output() + self.check_output(check_prim=True) def test_check_grad_normal(self): - self.check_grad(['Input'], 'Out', max_relative_error=0.006) + self.check_grad( + ['Input'], 'Out', max_relative_error=0.006, check_prim=True + ) # Situation 2: starts(list, have tensor), ends(list, no tensor) @@ -233,6 +257,8 @@ class TestSliceOp_starts_ListTensor(OpTest): class TestSliceOp_decs_dim_starts_ListTensor(OpTest): def setUp(self): self.op_type = "slice" + self.prim_op_type = "prim" + self.python_api = paddle.slice self.config() starts_tensor = [] @@ -264,10 +290,12 @@ class TestSliceOp_decs_dim_starts_ListTensor(OpTest): self.starts_infer = [1, -1, 2] def test_check_output(self): - self.check_output() + self.check_output(check_prim=True) def test_check_grad_normal(self): - self.check_grad(['Input'], 'Out', max_relative_error=0.006) + self.check_grad( + ['Input'], 'Out', max_relative_error=0.006, check_prim=True + ) class TestSliceOp_decs_dim_5_starts_ListTensor( @@ -290,6 +318,8 @@ class TestSliceOp_decs_dim_5_starts_ListTensor( class TestSliceOp_decs_dim_starts_OneTensor(OpTest): def setUp(self): self.op_type = "slice" + self.prim_op_type = "prim" + self.python_api = paddle.slice self.config() self.inputs = { 'Input': self.input, @@ -314,10 +344,12 @@ class TestSliceOp_decs_dim_starts_OneTensor(OpTest): self.out = self.input[1, 0:3, 2:4, :] def test_check_output(self): - self.check_output() + self.check_output(check_prim=True) def test_check_grad_normal(self): - self.check_grad(['Input'], 'Out', max_relative_error=0.006) + self.check_grad( + ['Input'], 'Out', max_relative_error=0.006, check_prim=True + ) # Situation 4: starts(tensor), ends(tensor) @@ -325,6 +357,8 @@ class TestSliceOp_decs_dim_starts_OneTensor(OpTest): class TestSliceOp_starts_OneTensor_ends_OneTensor(OpTest): def setUp(self): self.op_type = "slice" + self.prim_op_type = "prim" + self.python_api = paddle.slice self.config() self.inputs = { @@ -349,10 +383,12 @@ class TestSliceOp_starts_OneTensor_ends_OneTensor(OpTest): self.out = self.input[1:3, 0:3, 2:4, :] def test_check_output(self): - self.check_output() + self.check_output(check_prim=True) def test_check_grad_normal(self): - self.check_grad(['Input'], 'Out', max_relative_error=0.006) + self.check_grad( + ['Input'], 'Out', max_relative_error=0.006, check_prim=True + ) # Situation 5: starts(tensor), ends(tensor) @@ -360,6 +396,8 @@ class TestSliceOp_starts_OneTensor_ends_OneTensor(OpTest): class TestSliceOp_decs_dim_starts_and_ends_OneTensor(OpTest): def setUp(self): self.op_type = "slice" + self.prim_op_type = "prim" + self.python_api = paddle.slice self.config() self.inputs = { 'Input': self.input, @@ -385,10 +423,12 @@ class TestSliceOp_decs_dim_starts_and_ends_OneTensor(OpTest): self.out = self.input[1, 0, 2:4, :] def test_check_output(self): - self.check_output() + self.check_output(check_prim=True) def test_check_grad_normal(self): - self.check_grad(['Input'], 'Out', max_relative_error=0.006) + self.check_grad( + ['Input'], 'Out', max_relative_error=0.006, check_prim=True + ) # Situation 6: starts(tensor), ends(list, have tensor) @@ -396,6 +436,8 @@ class TestSliceOp_decs_dim_starts_and_ends_OneTensor(OpTest): class TestSliceOp_starts_OneTensor_ends_ListTensor(OpTest): def setUp(self): self.op_type = "slice" + self.prim_op_type = "prim" + self.python_api = paddle.slice self.config() ends_tensor = [] @@ -428,10 +470,12 @@ class TestSliceOp_starts_OneTensor_ends_ListTensor(OpTest): self.ends_infer = [-1, 3, 4] def test_check_output(self): - self.check_output() + self.check_output(check_prim=True) def test_check_grad_normal(self): - self.check_grad(['Input'], 'Out', max_relative_error=0.006) + self.check_grad( + ['Input'], 'Out', max_relative_error=0.006, check_prim=True + ) # Test CUDA float16 @@ -440,7 +484,10 @@ class TestSliceOp_starts_OneTensor_ends_ListTensor(OpTest): ) class TestFP16(OpTest): def setUp(self): + self.enable_cinn = False self.op_type = "slice" + self.prim_op_type = "prim" + self.python_api = paddle.slice self.config() self.inputs = {'Input': self.input} self.outputs = {'Out': self.out} @@ -463,13 +510,18 @@ class TestFP16(OpTest): def test_check_output(self): place = core.CUDAPlace(0) if core.is_float16_supported(place): - self.check_output_with_place(place, atol=1e-5) + self.check_output_with_place(place, atol=1e-5, check_prim=True) def test_check_grad_normal(self): place = core.CUDAPlace(0) + print("core:", core.is_float16_supported(place)) if core.is_float16_supported(place): self.check_grad_with_place( - place, ['Input'], 'Out', max_relative_error=0.006 + place, + ['Input'], + 'Out', + max_relative_error=0.006, + check_prim=True, ) @@ -479,6 +531,8 @@ class TestFP16(OpTest): class TestFP16_2(OpTest): def setUp(self): self.op_type = "slice" + self.prim_op_type = "prim" + self.python_api = paddle.slice self.config() self.inputs = {'Input': self.input} self.outputs = {'Out': self.out} @@ -501,7 +555,7 @@ class TestFP16_2(OpTest): def test_check_output(self): place = core.CUDAPlace(0) if core.is_float16_supported(place): - self.check_output_with_place(place, atol=1e-5) + self.check_output_with_place(place, atol=1e-5, check_prim=True) def test_check_grad_normal(self): place = core.CUDAPlace(0) @@ -512,12 +566,15 @@ class TestFP16_2(OpTest): 'Out', max_relative_error=0.006, numeric_grad_delta=0.5, + check_prim=True, ) class TestBF16(OpTest): def setUp(self): self.op_type = "slice" + self.prim_op_type = "prim" + self.python_api = paddle.slice self.config() self.inputs = {'Input': convert_float_to_uint16(self.input)} self.outputs = {'Out': convert_float_to_uint16(self.out)} diff --git a/tools/check_file_diff_approvals.sh b/tools/check_file_diff_approvals.sh index a065d7db9e..a5867d1c0c 100644 --- a/tools/check_file_diff_approvals.sh +++ b/tools/check_file_diff_approvals.sh @@ -212,7 +212,6 @@ for API_FILE in ${API_FILES[*]}; do elif [ "${API_FILE}" == "python/paddle/incubate/autograd/primitives.py" ] || [ "${API_FILE}" == "python/paddle/incubate/autograd/composite_rules.py" ]; then echo_line="You must have one RD (cyber-pioneer(chenzhuo), JiabinYang) approval for changing ${API_FILE} , which manages the composite rules.\n" check_approval 1 cyber-pioneer JiabinYang - else echo_line="You must have one RD (XiaoguangHu01,chenwhql,zhiqiu,Xreki,luotao1,qili93) approval for ${API_FILE}, which manages the underlying code for fluid.\n" check_approval 1 46782768 12538138 6836917 22561442 6888866 16605440 -- GitLab