提交 4bf168b2 编写于 作者: K Kexin Zhao

add fp16 kernel for elementwise add

上级 b3f076a6
...@@ -14,19 +14,20 @@ limitations under the License. */ ...@@ -14,19 +14,20 @@ limitations under the License. */
#define EIGEN_USE_GPU #define EIGEN_USE_GPU
#include "paddle/fluid/operators/elementwise_add_op.h" #include "paddle/fluid/operators/elementwise_add_op.h"
#include "paddle/fluid/platform/float16.h"
namespace ops = paddle::operators; namespace ops = paddle::operators;
namespace plat = padddle::platform;
REGISTER_OP_CUDA_KERNEL( REGISTER_OP_CUDA_KERNEL(
elementwise_add, elementwise_add, ops::ElementwiseAddKernel<plat::CUDADeviceContext, float>,
ops::ElementwiseAddKernel<paddle::platform::CUDADeviceContext, float>, ops::ElementwiseAddKernel<plat::CUDADeviceContext, double>,
ops::ElementwiseAddKernel<paddle::platform::CUDADeviceContext, double>, ops::ElementwiseAddKernel<plat::CUDADeviceContext, int>,
ops::ElementwiseAddKernel<paddle::platform::CUDADeviceContext, int>, ops::ElementwiseAddKernel<plat::CUDADeviceContext, int64_t>
ops::ElementwiseAddKernel<paddle::platform::CUDADeviceContext, int64_t>); ops::ElementwiseAddKernel<plat::CUDADeviceContext, plat::float16>);
REGISTER_OP_CUDA_KERNEL( REGISTER_OP_CUDA_KERNEL(
elementwise_add_grad, elementwise_add_grad,
ops::ElementwiseAddGradKernel<paddle::platform::CUDADeviceContext, float>, ops::ElementwiseAddGradKernel<plat::CUDADeviceContext, float>,
ops::ElementwiseAddGradKernel<paddle::platform::CUDADeviceContext, double>, ops::ElementwiseAddGradKernel<plat::CUDADeviceContext, double>,
ops::ElementwiseAddGradKernel<paddle::platform::CUDADeviceContext, int>, ops::ElementwiseAddGradKernel<plat::CUDADeviceContext, int>,
ops::ElementwiseAddGradKernel<paddle::platform::CUDADeviceContext, ops::ElementwiseAddGradKernel<plat::CUDADeviceContext, int64_t>);
int64_t>);
...@@ -13,34 +13,60 @@ ...@@ -13,34 +13,60 @@
# limitations under the License. # limitations under the License.
import unittest import unittest
import numpy as np import numpy as np
import paddle.fluid.core as core
from op_test import OpTest from op_test import OpTest
class TestElementwiseOp(OpTest): class TestElementwiseAddOp(OpTest):
def setUp(self): def setUp(self):
self.op_type = "elementwise_add" self.op_type = "elementwise_add"
self.dtype = np.float32
init_dtype()
x = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype)
y = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype)
self.inputs = { self.inputs = {
'X': np.random.uniform(0.1, 1, [13, 17]).astype("float32"), 'X': OpTest.np_dtype_to_fluid_dtype(x),
'Y': np.random.uniform(0.1, 1, [13, 17]).astype("float32") 'Y': OpTest.np_dtype_to_fluid_dtype(y)
} }
self.outputs = {'Out': np.add(self.inputs['X'], self.inputs['Y'])} self.outputs = {'Out': np.add(x, y)}
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output()
def test_check_grad_normal(self): def test_check_grad_normal(self):
if self.dtype == np.float16:
return
self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.005) self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.005)
def test_check_grad_ingore_x(self): def test_check_grad_ingore_x(self):
if self.dtype == np.float16:
return
self.check_grad( self.check_grad(
['Y'], 'Out', max_relative_error=0.005, no_grad_set=set("X")) ['Y'], 'Out', max_relative_error=0.005, no_grad_set=set("X"))
def test_check_grad_ingore_y(self): def test_check_grad_ingore_y(self):
if self.dtype == np.float16:
return
self.check_grad( self.check_grad(
['X'], 'Out', max_relative_error=0.005, no_grad_set=set('Y')) ['X'], 'Out', max_relative_error=0.005, no_grad_set=set('Y'))
def init_dtype():
pass
class TestFP16ElementwiseAddOp(TestElementwiseAddOp):
def init_dtype():
self.dtype = np.float16
def test_check_output(self):
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
if core.is_float16_supported(place):
self.check_output_with_place(place, atol=1e-3)
class TestElementwiseAddOp_scalar(TestElementwiseOp): class TestElementwiseAddOp_scalar(TestElementwiseAddOp):
def setUp(self): def setUp(self):
self.op_type = "elementwise_add" self.op_type = "elementwise_add"
self.inputs = { self.inputs = {
...@@ -50,7 +76,7 @@ class TestElementwiseAddOp_scalar(TestElementwiseOp): ...@@ -50,7 +76,7 @@ class TestElementwiseAddOp_scalar(TestElementwiseOp):
self.outputs = {'Out': self.inputs['X'] + self.inputs['Y']} self.outputs = {'Out': self.inputs['X'] + self.inputs['Y']}
class TestElementwiseAddOp_scalar2(TestElementwiseOp): class TestElementwiseAddOp_scalar2(TestElementwiseAddOp):
def setUp(self): def setUp(self):
self.op_type = "elementwise_add" self.op_type = "elementwise_add"
self.inputs = { self.inputs = {
...@@ -60,7 +86,7 @@ class TestElementwiseAddOp_scalar2(TestElementwiseOp): ...@@ -60,7 +86,7 @@ class TestElementwiseAddOp_scalar2(TestElementwiseOp):
self.outputs = {'Out': self.inputs['X'] + self.inputs['Y']} self.outputs = {'Out': self.inputs['X'] + self.inputs['Y']}
class TestElementwiseAddOp_Vector(TestElementwiseOp): class TestElementwiseAddOp_Vector(TestElementwiseAddOp):
def setUp(self): def setUp(self):
self.op_type = "elementwise_add" self.op_type = "elementwise_add"
self.inputs = { self.inputs = {
...@@ -70,7 +96,7 @@ class TestElementwiseAddOp_Vector(TestElementwiseOp): ...@@ -70,7 +96,7 @@ class TestElementwiseAddOp_Vector(TestElementwiseOp):
self.outputs = {'Out': np.add(self.inputs['X'], self.inputs['Y'])} self.outputs = {'Out': np.add(self.inputs['X'], self.inputs['Y'])}
class TestElementwiseAddOp_broadcast_0(TestElementwiseOp): class TestElementwiseAddOp_broadcast_0(TestElementwiseAddOp):
def setUp(self): def setUp(self):
self.op_type = "elementwise_add" self.op_type = "elementwise_add"
self.inputs = { self.inputs = {
...@@ -84,7 +110,7 @@ class TestElementwiseAddOp_broadcast_0(TestElementwiseOp): ...@@ -84,7 +110,7 @@ class TestElementwiseAddOp_broadcast_0(TestElementwiseOp):
} }
class TestElementwiseAddOp_broadcast_1(TestElementwiseOp): class TestElementwiseAddOp_broadcast_1(TestElementwiseAddOp):
def setUp(self): def setUp(self):
self.op_type = "elementwise_add" self.op_type = "elementwise_add"
self.inputs = { self.inputs = {
...@@ -98,7 +124,7 @@ class TestElementwiseAddOp_broadcast_1(TestElementwiseOp): ...@@ -98,7 +124,7 @@ class TestElementwiseAddOp_broadcast_1(TestElementwiseOp):
} }
class TestElementwiseAddOp_broadcast_2(TestElementwiseOp): class TestElementwiseAddOp_broadcast_2(TestElementwiseAddOp):
def setUp(self): def setUp(self):
self.op_type = "elementwise_add" self.op_type = "elementwise_add"
self.inputs = { self.inputs = {
...@@ -111,7 +137,7 @@ class TestElementwiseAddOp_broadcast_2(TestElementwiseOp): ...@@ -111,7 +137,7 @@ class TestElementwiseAddOp_broadcast_2(TestElementwiseOp):
} }
class TestElementwiseAddOp_broadcast_3(TestElementwiseOp): class TestElementwiseAddOp_broadcast_3(TestElementwiseAddOp):
def setUp(self): def setUp(self):
self.op_type = "elementwise_add" self.op_type = "elementwise_add"
self.inputs = { self.inputs = {
...@@ -125,7 +151,7 @@ class TestElementwiseAddOp_broadcast_3(TestElementwiseOp): ...@@ -125,7 +151,7 @@ class TestElementwiseAddOp_broadcast_3(TestElementwiseOp):
} }
class TestElementwiseAddOp_broadcast_4(TestElementwiseOp): class TestElementwiseAddOp_broadcast_4(TestElementwiseAddOp):
def setUp(self): def setUp(self):
self.op_type = "elementwise_add" self.op_type = "elementwise_add"
self.inputs = { self.inputs = {
...@@ -139,7 +165,7 @@ class TestElementwiseAddOp_broadcast_4(TestElementwiseOp): ...@@ -139,7 +165,7 @@ class TestElementwiseAddOp_broadcast_4(TestElementwiseOp):
} }
class TestElementwiseAddOp_rowwise_add_0(TestElementwiseOp): class TestElementwiseAddOp_rowwise_add_0(TestElementwiseAddOp):
def setUp(self): def setUp(self):
self.op_type = "elementwise_add" self.op_type = "elementwise_add"
self.inputs = { self.inputs = {
...@@ -153,7 +179,7 @@ class TestElementwiseAddOp_rowwise_add_0(TestElementwiseOp): ...@@ -153,7 +179,7 @@ class TestElementwiseAddOp_rowwise_add_0(TestElementwiseOp):
} }
class TestElementwiseAddOp_rowwise_add_1(TestElementwiseOp): class TestElementwiseAddOp_rowwise_add_1(TestElementwiseAddOp):
def setUp(self): def setUp(self):
self.op_type = "elementwise_add" self.op_type = "elementwise_add"
self.inputs = { self.inputs = {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册