From b79c6a9b3315742283a923ed52a5934c231beeff Mon Sep 17 00:00:00 2001 From: zhangbo9674 <82555433+zhangbo9674@users.noreply.github.com> Date: Fri, 25 Mar 2022 14:13:20 +0800 Subject: [PATCH] add cast_grad phi kernel (#40798) * add cast_grad phi kernel * refie unittest * refien unittest * refine unittest * refine include header path * refien xpu cast unittest * refine code --- paddle/phi/kernels/cast_grad_kernel.h | 27 ++++++++++ paddle/phi/kernels/cpu/cast_grad_kernel.cc | 49 +++++++++++++++++ paddle/phi/kernels/cpu/cast_impl.h | 47 ++++++++++++++++ paddle/phi/kernels/cpu/cast_kernel.cc | 29 +--------- paddle/phi/kernels/gpu/cast_grad_kernel.cu | 53 +++++++++++++++++++ paddle/phi/kernels/gpu/cast_impl.h | 42 +++++++++++++++ paddle/phi/kernels/gpu/cast_kernel.cu | 34 +----------- .../fluid/tests/unittests/test_cast_op.py | 4 ++ .../white_list/op_accuracy_white_list.py | 3 +- .../tests/unittests/xpu/test_cast_op_xpu.py | 1 + python/paddle/utils/code_gen/api.yaml | 1 + python/paddle/utils/code_gen/backward.yaml | 10 ++++ 12 files changed, 239 insertions(+), 61 deletions(-) create mode 100644 paddle/phi/kernels/cast_grad_kernel.h create mode 100644 paddle/phi/kernels/cpu/cast_grad_kernel.cc create mode 100644 paddle/phi/kernels/cpu/cast_impl.h create mode 100644 paddle/phi/kernels/gpu/cast_grad_kernel.cu create mode 100644 paddle/phi/kernels/gpu/cast_impl.h diff --git a/paddle/phi/kernels/cast_grad_kernel.h b/paddle/phi/kernels/cast_grad_kernel.h new file mode 100644 index 0000000000..6b98bc88de --- /dev/null +++ b/paddle/phi/kernels/cast_grad_kernel.h @@ -0,0 +1,27 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/phi/core/dense_tensor.h" + +namespace phi { + +template +void CastGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& out_grad, + DenseTensor* x_grad); + +} // namespace phi diff --git a/paddle/phi/kernels/cpu/cast_grad_kernel.cc b/paddle/phi/kernels/cpu/cast_grad_kernel.cc new file mode 100644 index 0000000000..c294c743bd --- /dev/null +++ b/paddle/phi/kernels/cpu/cast_grad_kernel.cc @@ -0,0 +1,49 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/cast_grad_kernel.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/cpu/cast_impl.h" + +namespace phi { + +template +void CastGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& out_grad, + DenseTensor* x_grad) { + PD_VISIT_ALL_TYPES(x.dtype(), "CastKernelImpl", ([&] { + CastKernelImpl(dev_ctx, out_grad, x_grad); + })); +} + +} // namespace phi + +PD_REGISTER_KERNEL(cast_grad, + CPU, + ALL_LAYOUT, + phi::CastGradKernel, + float, + double, + int, + int64_t, + int16_t, + bool, + uint8_t, + phi::dtype::float16, + phi::dtype::bfloat16, + phi::dtype::complex, + phi::dtype::complex) { + kernel->OutputAt(0).SetDataType(paddle::experimental::DataType::UNDEFINED); +} diff --git a/paddle/phi/kernels/cpu/cast_impl.h b/paddle/phi/kernels/cpu/cast_impl.h new file mode 100644 index 0000000000..d39ef24e7b --- /dev/null +++ b/paddle/phi/kernels/cpu/cast_impl.h @@ -0,0 +1,47 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "paddle/phi/api/ext/dispatch.h" +#include "paddle/phi/backends/cpu/cpu_context.h" + +// See Note [ Why still include the fluid headers? ] +#include "paddle/fluid/platform/transform.h" + +namespace phi { + +template +struct CastOpTransformFunctor { + HOSTDEVICE OutT operator()(InT in) const { return static_cast(in); } +}; + +template +void CastKernelImpl(const CPUContext& dev_ctx, + const DenseTensor& x, + DenseTensor* out) { + auto* in_begin = x.data(); + auto numel = x.numel(); + auto* in_end = in_begin + numel; + + auto* out_begin = dev_ctx.Alloc(out); + + paddle::platform::Transform trans; + trans(dev_ctx, + in_begin, + in_end, + out_begin, + CastOpTransformFunctor()); +} + +} // namespace phi diff --git a/paddle/phi/kernels/cpu/cast_kernel.cc b/paddle/phi/kernels/cpu/cast_kernel.cc index c2c207bfaf..800962544c 100644 --- a/paddle/phi/kernels/cpu/cast_kernel.cc +++ b/paddle/phi/kernels/cpu/cast_kernel.cc @@ -13,39 +13,12 @@ // limitations under the License. #include "paddle/phi/kernels/cast_kernel.h" +#include "paddle/phi/kernels/cpu/cast_impl.h" -#include "paddle/phi/api/ext/dispatch.h" -#include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" -// See Note [ Why still include the fluid headers? ] -#include "paddle/fluid/platform/transform.h" - namespace phi { -template -struct CastOpTransformFunctor { - HOSTDEVICE OutT operator()(InT in) const { return static_cast(in); } -}; - -template -void CastKernelImpl(const CPUContext& dev_ctx, - const DenseTensor& x, - DenseTensor* out) { - auto* in_begin = x.data(); - auto numel = x.numel(); - auto* in_end = in_begin + numel; - - auto* out_begin = dev_ctx.Alloc(out); - - paddle::platform::Transform trans; - trans(dev_ctx, - in_begin, - in_end, - out_begin, - CastOpTransformFunctor()); -} - template void CastKernel(const Context& dev_ctx, const DenseTensor& x, diff --git a/paddle/phi/kernels/gpu/cast_grad_kernel.cu b/paddle/phi/kernels/gpu/cast_grad_kernel.cu new file mode 100644 index 0000000000..1c1d8cf2c0 --- /dev/null +++ b/paddle/phi/kernels/gpu/cast_grad_kernel.cu @@ -0,0 +1,53 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/cast_grad_kernel.h" +#include "paddle/phi/kernels/gpu/cast_impl.h" + +namespace phi { + +template +void CastGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& out_grad, + DenseTensor* x_grad) { + PD_VISIT_ALL_TYPES(x.dtype(), "CastCUDAKernelImpl", ([&] { + CastCUDAKernelImpl(dev_ctx, out_grad, x_grad); + })); +} + +} // namespace phi + +#define PTEN_REGISTER_CAST_CUDA_BASE_TYPE(op_name, ...) \ + PD_REGISTER_KERNEL(cast_grad, \ + GPU, \ + ALL_LAYOUT, \ + phi::CastGradKernel, \ + float, \ + double, \ + int, \ + int64_t, \ + int16_t, \ + bool, \ + uint8_t, \ + phi::dtype::float16, \ + phi::dtype::complex, \ + phi::dtype::complex, \ + ##__VA_ARGS__) { \ + kernel->OutputAt(0).SetDataType( \ + paddle::experimental::DataType::UNDEFINED); \ + } + +PTEN_REGISTER_CAST_CUDA_BASE_TYPE(cast_grad, phi::dtype::bfloat16) diff --git a/paddle/phi/kernels/gpu/cast_impl.h b/paddle/phi/kernels/gpu/cast_impl.h new file mode 100644 index 0000000000..8f6351e675 --- /dev/null +++ b/paddle/phi/kernels/gpu/cast_impl.h @@ -0,0 +1,42 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "paddle/phi/api/ext/dispatch.h" +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/kernels/funcs/elementwise_base.h" + +namespace phi { + +template +struct CastFuctor { + __device__ __forceinline__ OutT operator()(const InT x) const { + return static_cast(x); + } +}; + +template +void CastCUDAKernelImpl(const GPUContext& dev_ctx, + const DenseTensor& x, + DenseTensor* out) { + std::vector inputs; + std::vector outputs; + inputs.emplace_back(&x); + outputs.emplace_back(out); + dev_ctx.Alloc(out); + phi::funcs::ElementwiseKernel( + dev_ctx, inputs, &outputs, CastFuctor()); +} + +} // namespace phi diff --git a/paddle/phi/kernels/gpu/cast_kernel.cu b/paddle/phi/kernels/gpu/cast_kernel.cu index 542234c80b..7c4cadbc90 100644 --- a/paddle/phi/kernels/gpu/cast_kernel.cu +++ b/paddle/phi/kernels/gpu/cast_kernel.cu @@ -12,42 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/phi/kernels/cast_kernel.h" - -#include "paddle/phi/api/ext/dispatch.h" -#include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/funcs/elementwise_base.h" - -// See Note [ Why still include the fluid headers? ] -#include "paddle/fluid/platform/device/gpu/gpu_helper.h" -#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" -#include "paddle/phi/common/bfloat16.h" -#include "paddle/phi/common/float16.h" -#include "paddle/phi/kernels/funcs/aligned_vector.h" +#include "paddle/phi/kernels/cast_kernel.h" +#include "paddle/phi/kernels/gpu/cast_impl.h" namespace phi { -template -struct CastFuctor { - __device__ __forceinline__ OutT operator()(const InT x) const { - return static_cast(x); - } -}; - -template -void CastCUDAKernelImpl(const GPUContext& dev_ctx, - const DenseTensor& x, - DenseTensor* out) { - std::vector inputs; - std::vector outputs; - inputs.emplace_back(&x); - outputs.emplace_back(out); - dev_ctx.Alloc(out); - phi::funcs::ElementwiseKernel( - dev_ctx, inputs, &outputs, CastFuctor()); -} - template void CastKernel(const Context& dev_ctx, const DenseTensor& x, diff --git a/python/paddle/fluid/tests/unittests/test_cast_op.py b/python/paddle/fluid/tests/unittests/test_cast_op.py index 988e7df41a..d80a9dc920 100644 --- a/python/paddle/fluid/tests/unittests/test_cast_op.py +++ b/python/paddle/fluid/tests/unittests/test_cast_op.py @@ -52,6 +52,7 @@ class TestCastOpFp16ToFp32(OpTest): 'out_dtype': int(core.VarDesc.VarType.FP32) } self.op_type = 'cast' + self.__class__.no_need_check_grad = True def test_check_output(self): self.check_output(atol=1e-3) @@ -67,6 +68,7 @@ class TestCastOpFp32ToFp16(OpTest): 'out_dtype': int(core.VarDesc.VarType.FP16) } self.op_type = 'cast' + self.__class__.no_need_check_grad = True def test_check_output(self): self.check_output(atol=1e-3) @@ -82,6 +84,7 @@ class TestCastOpBf16ToFp32(OpTest): 'out_dtype': int(core.VarDesc.VarType.FP32) } self.op_type = 'cast' + self.__class__.no_need_check_grad = True def test_check_output(self): self.check_output() @@ -97,6 +100,7 @@ class TestCastOpFp32ToBf16(OpTest): 'out_dtype': int(core.VarDesc.VarType.BF16) } self.op_type = 'cast' + self.__class__.no_need_check_grad = True def test_check_output(self): self.check_output() diff --git a/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py b/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py index 581656f6cd..9b57b0d824 100644 --- a/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py +++ b/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py @@ -76,7 +76,8 @@ NO_FP64_CHECK_GRAD_OP_LIST = [ 'trilinear_interp_v2', \ 'var_conv_2d', \ 'warpctc', \ - 'bilateral_slice' + 'bilateral_slice', \ + 'cast' ] NO_FP16_CHECK_GRAD_OP_LIST = [ diff --git a/python/paddle/fluid/tests/unittests/xpu/test_cast_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_cast_op_xpu.py index 2447408296..08d4810a65 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_cast_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_cast_op_xpu.py @@ -44,6 +44,7 @@ def create_test_class(in_typename, out_typename): 'out_dtype': typeid_dict[out_typename], } self.op_type = 'cast' + self.__class__.no_need_check_grad = True def test_check_output(self): if paddle.is_compiled_with_xpu(): diff --git a/python/paddle/utils/code_gen/api.yaml b/python/paddle/utils/code_gen/api.yaml index e1b13ab440..11e8d67ca4 100644 --- a/python/paddle/utils/code_gen/api.yaml +++ b/python/paddle/utils/code_gen/api.yaml @@ -16,6 +16,7 @@ func : cast param : [x, out_dtype] data_type : x + backward : cast_grad - api : concat diff --git a/python/paddle/utils/code_gen/backward.yaml b/python/paddle/utils/code_gen/backward.yaml index ff5ebd6ef6..6b597ce44a 100644 --- a/python/paddle/utils/code_gen/backward.yaml +++ b/python/paddle/utils/code_gen/backward.yaml @@ -307,6 +307,16 @@ kernel : func : mv_grad +- backward_api : cast_grad + forward : cast (Tensor x, DataType out_dtype) -> Tensor(out) + args : (Tensor x, Tensor out_grad) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param : [x] + kernel : + func : cast_grad + data_type : out_grad # =================================== sep0 -- GitLab