diff --git a/paddle/fluid/inference/tests/api/ipu_resnet50_test.cc b/paddle/fluid/inference/tests/api/ipu_resnet50_test.cc index ade4a911071ca1a176fc17d783326f2aefe89265..f5e755ab466915d03d799e565a14107ff2f62f23 100644 --- a/paddle/fluid/inference/tests/api/ipu_resnet50_test.cc +++ b/paddle/fluid/inference/tests/api/ipu_resnet50_test.cc @@ -112,4 +112,4 @@ TEST(Analyzer_Resnet50_ipu, compare_results_2_batch) { } } // namespace inference -} // namespace paddle \ No newline at end of file +} // namespace paddle diff --git a/paddle/fluid/operators/elementwise/elementwise_min_op.cu b/paddle/fluid/operators/elementwise/elementwise_min_op.cu index a51398640579b966859b328ab7680d62f8f55f57..59f1c51bce266fc85f63d8259635a7b165fa416d 100644 --- a/paddle/fluid/operators/elementwise/elementwise_min_op.cu +++ b/paddle/fluid/operators/elementwise/elementwise_min_op.cu @@ -41,12 +41,16 @@ namespace ops = paddle::operators; REGISTER_OP_CUDA_KERNEL( elementwise_min, + ops::ElementwiseMinKernel, ops::ElementwiseMinKernel, ops::ElementwiseMinKernel, ops::ElementwiseMinKernel, ops::ElementwiseMinKernel); REGISTER_OP_CUDA_KERNEL( elementwise_min_grad, + ops::ElementwiseMinGradKernel, ops::ElementwiseMinGradKernel, ops::ElementwiseMinGradKernel, ops::ElementwiseMinGradKernel, diff --git a/paddle/fluid/operators/elementwise/elementwise_min_op.h b/paddle/fluid/operators/elementwise/elementwise_min_op.h index 2f96ef747708bf5852e2de40fc593f8e913620b7..ebd8f4477d8cf3d3289197d5cc013aa57403df95 100644 --- a/paddle/fluid/operators/elementwise/elementwise_min_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_min_op.h @@ -19,6 +19,7 @@ limitations under the License. */ #include "paddle/fluid/operators/elementwise/elementwise_op.h" #include "paddle/fluid/operators/elementwise/elementwise_op_function.h" #include "paddle/fluid/platform/eigen_ext.h" +#include "paddle/fluid/platform/float16.h" namespace paddle { namespace operators { @@ -67,6 +68,28 @@ struct MinGradDy { } }; +#ifdef PADDLE_CUDA_FP16 +template <> +struct MinGradDx { + HOSTDEVICE platform::float16 operator()(platform::float16 x, + platform::float16 y, + platform::float16 out, + platform::float16 dout) const { + return x < y ? dout : static_cast(0); + } +}; + +template <> +struct MinGradDy { + HOSTDEVICE platform::float16 operator()(platform::float16 x, + platform::float16 y, + platform::float16 out, + platform::float16 dout) const { + return x >= y ? dout : static_cast(0); + } +}; +#endif + template class ElementwiseMinGradKernel : public ElemwiseGradKernel { public: diff --git a/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py b/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py index 95e597c703b4e4e004c0d133abfe6966c6df9734..c466cafe1ff3c393f4835988a39fff50044d0b9a 100644 --- a/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py +++ b/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py @@ -17,6 +17,9 @@ from ... import core __all__ = ["CustomOpLists", "AutoMixedPrecisionLists"] +# lookup_table fp16 is slower than fp32, though fp16 is supported. +_extra_unsupported_fp16_list = {'lookup_table', 'lookup_table_v2'} + class AutoMixedPrecisionLists(object): """ @@ -60,6 +63,8 @@ class AutoMixedPrecisionLists(object): elif op_name in self.gray_list: self.gray_list.remove(op_name) self.white_list.add(op_name) + if op_name in _extra_unsupported_fp16_list: + self.unsupported_list.remove(op_name) if self._custom_black_list: for op_name in self._custom_black_list: if op_name in self.white_list: @@ -170,7 +175,6 @@ else: _, _, _sys_unsupported_fp16_list = core.op_supported_infos( 'GPU', core.VarDesc.VarType.FP16) -unsupported_fp16_list = {'lookup_table', - 'lookup_table_v2'} | _sys_unsupported_fp16_list +unsupported_fp16_list = _extra_unsupported_fp16_list | _sys_unsupported_fp16_list CustomOpLists = AutoMixedPrecisionLists diff --git a/python/paddle/fluid/contrib/tests/test_amp_list.py b/python/paddle/fluid/contrib/tests/test_amp_list.py new file mode 100644 index 0000000000000000000000000000000000000000..9133a404fa0e26befa5b0bc52ce21db0dbb79718 --- /dev/null +++ b/python/paddle/fluid/contrib/tests/test_amp_list.py @@ -0,0 +1,34 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import unittest +from paddle.fluid.contrib.mixed_precision.fp16_lists import AutoMixedPrecisionLists + + +class TestAMPList(unittest.TestCase): + def test_main(self): + custom_white_list = [ + 'lookup_table', + 'lookup_table_v2', + ] + amp_list = AutoMixedPrecisionLists(custom_white_list=custom_white_list) + for op in custom_white_list: + self.assertTrue(op in amp_list.white_list) + self.assertTrue(op not in amp_list.black_list) + self.assertTrue(op not in amp_list.unsupported_list) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_min_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_min_op.py index 8e17ce32ca8a7d9c4ae5248e751b4e832ef09190..0999acc75acff8567200f80e052d240171430628 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_min_op.py +++ b/python/paddle/fluid/tests/unittests/test_elementwise_min_op.py @@ -17,6 +17,11 @@ from __future__ import print_function import unittest import numpy as np from op_test import OpTest, skip_check_grad_ci +import paddle +import paddle.fluid as fluid +import paddle.fluid.core as core + +paddle.enable_static() class TestElementwiseOp(OpTest): @@ -142,5 +147,54 @@ class TestElementwiseMinOp_broadcast_4(TestElementwiseOp): self.outputs = {'Out': np.minimum(self.inputs['X'], self.inputs['Y'])} +class TestElementwiseMinOpFP16(unittest.TestCase): + def get_out_and_grad(self, x_np, y_np, axis, place, use_fp32=False): + assert x_np.dtype == np.float16 + assert y_np.dtype == np.float16 + if use_fp32: + x_np = x_np.astype(np.float32) + y_np = y_np.astype(np.float32) + dtype = np.float16 + + with fluid.dygraph.guard(place): + x = paddle.to_tensor(x_np) + y = paddle.to_tensor(y_np) + x.stop_gradient = False + y.stop_gradient = False + z = fluid.layers.elementwise_min(x, y, axis) + x_g, y_g = paddle.grad([z], [x, y]) + return z.numpy().astype(dtype), x_g.numpy().astype( + dtype), y_g.numpy().astype(dtype) + + def check_main(self, x_shape, y_shape, axis=-1): + if not paddle.is_compiled_with_cuda(): + return + place = paddle.CUDAPlace(0) + if not core.is_float16_supported(place): + return + + x_np = np.random.random(size=x_shape).astype(np.float16) + y_np = np.random.random(size=y_shape).astype(np.float16) + + z_1, x_g_1, y_g_1 = self.get_out_and_grad(x_np, y_np, axis, place, + False) + z_2, x_g_2, y_g_2 = self.get_out_and_grad(x_np, y_np, axis, place, True) + self.assertTrue(np.array_equal(z_1, z_2), "{} vs {}".format(z_1, z_2)) + self.assertTrue( + np.array_equal(x_g_1, x_g_2), "{} vs {}".format(x_g_1, x_g_2)) + self.assertTrue( + np.array_equal(y_g_1, y_g_2), "{} vs {}".format(y_g_1, y_g_2)) + + def test_main(self): + self.check_main((13, 17), (13, 17)) + self.check_main((10, 3, 4), (1, )) + self.check_main((100, ), (100, )) + self.check_main((100, 3, 2), (100, ), 0) + self.check_main((2, 100, 3), (100, ), 1) + self.check_main((2, 3, 100), (100, )) + self.check_main((2, 25, 4, 1), (25, 4), 1) + self.check_main((2, 10, 2, 5), (2, 10, 1, 5)) + + if __name__ == '__main__': unittest.main()