From 4bf168b2745964077d39483334e6d6bb9d9b8087 Mon Sep 17 00:00:00 2001 From: Kexin Zhao Date: Mon, 19 Mar 2018 17:15:46 -0700 Subject: [PATCH] add fp16 kernel for elementwise add --- paddle/fluid/operators/elementwise_add_op.cu | 21 ++++---- .../unittests/test_elementwise_add_op.py | 54 ++++++++++++++----- 2 files changed, 51 insertions(+), 24 deletions(-) diff --git a/paddle/fluid/operators/elementwise_add_op.cu b/paddle/fluid/operators/elementwise_add_op.cu index 19dc4a5215..c8bf524144 100644 --- a/paddle/fluid/operators/elementwise_add_op.cu +++ b/paddle/fluid/operators/elementwise_add_op.cu @@ -14,19 +14,20 @@ limitations under the License. */ #define EIGEN_USE_GPU #include "paddle/fluid/operators/elementwise_add_op.h" +#include "paddle/fluid/platform/float16.h" namespace ops = paddle::operators; +namespace plat = padddle::platform; REGISTER_OP_CUDA_KERNEL( - elementwise_add, - ops::ElementwiseAddKernel, - ops::ElementwiseAddKernel, - ops::ElementwiseAddKernel, - ops::ElementwiseAddKernel); + elementwise_add, ops::ElementwiseAddKernel, + ops::ElementwiseAddKernel, + ops::ElementwiseAddKernel, + ops::ElementwiseAddKernel + ops::ElementwiseAddKernel); REGISTER_OP_CUDA_KERNEL( elementwise_add_grad, - ops::ElementwiseAddGradKernel, - ops::ElementwiseAddGradKernel, - ops::ElementwiseAddGradKernel, - ops::ElementwiseAddGradKernel); + ops::ElementwiseAddGradKernel, + ops::ElementwiseAddGradKernel, + ops::ElementwiseAddGradKernel, + ops::ElementwiseAddGradKernel); diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py index 5b2384e94d..28286d79ea 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py +++ b/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py @@ -13,34 +13,60 @@ # limitations under the License. import unittest import numpy as np +import paddle.fluid.core as core from op_test import OpTest -class TestElementwiseOp(OpTest): +class TestElementwiseAddOp(OpTest): def setUp(self): self.op_type = "elementwise_add" + self.dtype = np.float32 + init_dtype() + + x = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype) + y = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype) self.inputs = { - 'X': np.random.uniform(0.1, 1, [13, 17]).astype("float32"), - 'Y': np.random.uniform(0.1, 1, [13, 17]).astype("float32") + 'X': OpTest.np_dtype_to_fluid_dtype(x), + 'Y': OpTest.np_dtype_to_fluid_dtype(y) } - self.outputs = {'Out': np.add(self.inputs['X'], self.inputs['Y'])} + self.outputs = {'Out': np.add(x, y)} def test_check_output(self): self.check_output() def test_check_grad_normal(self): + if self.dtype == np.float16: + return self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.005) def test_check_grad_ingore_x(self): + if self.dtype == np.float16: + return self.check_grad( ['Y'], 'Out', max_relative_error=0.005, no_grad_set=set("X")) def test_check_grad_ingore_y(self): + if self.dtype == np.float16: + return self.check_grad( ['X'], 'Out', max_relative_error=0.005, no_grad_set=set('Y')) + def init_dtype(): + pass + + +class TestFP16ElementwiseAddOp(TestElementwiseAddOp): + def init_dtype(): + self.dtype = np.float16 + + def test_check_output(self): + if core.is_compiled_with_cuda(): + place = core.CUDAPlace(0) + if core.is_float16_supported(place): + self.check_output_with_place(place, atol=1e-3) + -class TestElementwiseAddOp_scalar(TestElementwiseOp): +class TestElementwiseAddOp_scalar(TestElementwiseAddOp): def setUp(self): self.op_type = "elementwise_add" self.inputs = { @@ -50,7 +76,7 @@ class TestElementwiseAddOp_scalar(TestElementwiseOp): self.outputs = {'Out': self.inputs['X'] + self.inputs['Y']} -class TestElementwiseAddOp_scalar2(TestElementwiseOp): +class TestElementwiseAddOp_scalar2(TestElementwiseAddOp): def setUp(self): self.op_type = "elementwise_add" self.inputs = { @@ -60,7 +86,7 @@ class TestElementwiseAddOp_scalar2(TestElementwiseOp): self.outputs = {'Out': self.inputs['X'] + self.inputs['Y']} -class TestElementwiseAddOp_Vector(TestElementwiseOp): +class TestElementwiseAddOp_Vector(TestElementwiseAddOp): def setUp(self): self.op_type = "elementwise_add" self.inputs = { @@ -70,7 +96,7 @@ class TestElementwiseAddOp_Vector(TestElementwiseOp): self.outputs = {'Out': np.add(self.inputs['X'], self.inputs['Y'])} -class TestElementwiseAddOp_broadcast_0(TestElementwiseOp): +class TestElementwiseAddOp_broadcast_0(TestElementwiseAddOp): def setUp(self): self.op_type = "elementwise_add" self.inputs = { @@ -84,7 +110,7 @@ class TestElementwiseAddOp_broadcast_0(TestElementwiseOp): } -class TestElementwiseAddOp_broadcast_1(TestElementwiseOp): +class TestElementwiseAddOp_broadcast_1(TestElementwiseAddOp): def setUp(self): self.op_type = "elementwise_add" self.inputs = { @@ -98,7 +124,7 @@ class TestElementwiseAddOp_broadcast_1(TestElementwiseOp): } -class TestElementwiseAddOp_broadcast_2(TestElementwiseOp): +class TestElementwiseAddOp_broadcast_2(TestElementwiseAddOp): def setUp(self): self.op_type = "elementwise_add" self.inputs = { @@ -111,7 +137,7 @@ class TestElementwiseAddOp_broadcast_2(TestElementwiseOp): } -class TestElementwiseAddOp_broadcast_3(TestElementwiseOp): +class TestElementwiseAddOp_broadcast_3(TestElementwiseAddOp): def setUp(self): self.op_type = "elementwise_add" self.inputs = { @@ -125,7 +151,7 @@ class TestElementwiseAddOp_broadcast_3(TestElementwiseOp): } -class TestElementwiseAddOp_broadcast_4(TestElementwiseOp): +class TestElementwiseAddOp_broadcast_4(TestElementwiseAddOp): def setUp(self): self.op_type = "elementwise_add" self.inputs = { @@ -139,7 +165,7 @@ class TestElementwiseAddOp_broadcast_4(TestElementwiseOp): } -class TestElementwiseAddOp_rowwise_add_0(TestElementwiseOp): +class TestElementwiseAddOp_rowwise_add_0(TestElementwiseAddOp): def setUp(self): self.op_type = "elementwise_add" self.inputs = { @@ -153,7 +179,7 @@ class TestElementwiseAddOp_rowwise_add_0(TestElementwiseOp): } -class TestElementwiseAddOp_rowwise_add_1(TestElementwiseOp): +class TestElementwiseAddOp_rowwise_add_1(TestElementwiseAddOp): def setUp(self): self.op_type = "elementwise_add" self.inputs = { -- GitLab