diff --git a/paddle/phi/kernels/gpu/scale_kernel.cu b/paddle/phi/kernels/gpu/scale_kernel.cu index 0efcd0b7063f34360f2f5e4e175a6714d3ab6a6e..bb6aca1a1b6375ae99b14d47594f554ee5853b61 100644 --- a/paddle/phi/kernels/gpu/scale_kernel.cu +++ b/paddle/phi/kernels/gpu/scale_kernel.cu @@ -21,22 +21,22 @@ limitations under the License. */ namespace phi { -template +template struct ScaleFunctor { - InT bias; - InT scale; + ParamT bias; + ParamT scale; bool bias_after_scale; - ScaleFunctor(InT scale_data, InT bias_data, bool is_bias_after_sacle) + ScaleFunctor(ParamT scale_data, ParamT bias_data, bool is_bias_after_sacle) : bias(bias_data), scale(scale_data), bias_after_scale(is_bias_after_sacle) {} - __device__ __forceinline__ InT operator()(const InT x) const { + __device__ __forceinline__ DataT operator()(const DataT x) const { if (bias_after_scale) { - return scale * x + bias; + return static_cast(scale * static_cast(x) + bias); } else { - return scale * (x + bias); + return static_cast(scale * (static_cast(x) + bias)); } } }; @@ -48,6 +48,7 @@ void ScaleKernel(const Context& dev_ctx, float bias, bool bias_after_scale, DenseTensor* out) { + using MT = typename phi::dtype::MPTypeTrait::Type; std::vector inputs; std::vector outputs; inputs.emplace_back(&x); @@ -60,7 +61,8 @@ void ScaleKernel(const Context& dev_ctx, dev_ctx, inputs, &outputs, - ScaleFunctor(scale.to(), static_cast(bias), bias_after_scale)); + ScaleFunctor( + scale.to(), static_cast(bias), bias_after_scale)); } } // namespace phi diff --git a/python/paddle/fluid/tests/unittests/test_scale_op.py b/python/paddle/fluid/tests/unittests/test_scale_op.py index f66ff39f1087248784d2bd941e930bc5afc6bf1d..33c6a9af92c3f5154df51122662b3aff10995879 100644 --- a/python/paddle/fluid/tests/unittests/test_scale_op.py +++ b/python/paddle/fluid/tests/unittests/test_scale_op.py @@ -149,15 +149,11 @@ class TestScaleFp16Op(TestScaleOp): def test_check_output(self): place = core.CUDAPlace(0) - if core.is_float16_supported(place): - self.check_output_with_place(place, atol=0.002, check_eager=True) + self.check_output_with_place(place, check_eager=True) def test_check_grad(self): place = core.CUDAPlace(0) - if core.is_float16_supported(place): - self.check_grad_with_place( - place, ["X"], "Out", max_relative_error=0.05, check_eager=True - ) + self.check_grad_with_place(place, ["X"], "Out", check_eager=True) class TestScaleBF16Op(OpTest):