未验证 提交 c1e9b1e3 编写于 作者: K Kexin Zhao 提交者: GitHub

Merge pull request #9231 from kexinzhao/elementwise_add_fp16

Add float16 support to Elementwise Add op
...@@ -14,19 +14,20 @@ limitations under the License. */ ...@@ -14,19 +14,20 @@ limitations under the License. */
#define EIGEN_USE_GPU #define EIGEN_USE_GPU
#include "paddle/fluid/operators/elementwise_add_op.h" #include "paddle/fluid/operators/elementwise_add_op.h"
#include "paddle/fluid/platform/float16.h"
namespace ops = paddle::operators; namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_CUDA_KERNEL( REGISTER_OP_CUDA_KERNEL(
elementwise_add, elementwise_add, ops::ElementwiseAddKernel<plat::CUDADeviceContext, float>,
ops::ElementwiseAddKernel<paddle::platform::CUDADeviceContext, float>, ops::ElementwiseAddKernel<plat::CUDADeviceContext, double>,
ops::ElementwiseAddKernel<paddle::platform::CUDADeviceContext, double>, ops::ElementwiseAddKernel<plat::CUDADeviceContext, int>,
ops::ElementwiseAddKernel<paddle::platform::CUDADeviceContext, int>, ops::ElementwiseAddKernel<plat::CUDADeviceContext, int64_t>,
ops::ElementwiseAddKernel<paddle::platform::CUDADeviceContext, int64_t>); ops::ElementwiseAddKernel<plat::CUDADeviceContext, plat::float16>);
REGISTER_OP_CUDA_KERNEL( REGISTER_OP_CUDA_KERNEL(
elementwise_add_grad, elementwise_add_grad,
ops::ElementwiseAddGradKernel<paddle::platform::CUDADeviceContext, float>, ops::ElementwiseAddGradKernel<plat::CUDADeviceContext, float>,
ops::ElementwiseAddGradKernel<paddle::platform::CUDADeviceContext, double>, ops::ElementwiseAddGradKernel<plat::CUDADeviceContext, double>,
ops::ElementwiseAddGradKernel<paddle::platform::CUDADeviceContext, int>, ops::ElementwiseAddGradKernel<plat::CUDADeviceContext, int>,
ops::ElementwiseAddGradKernel<paddle::platform::CUDADeviceContext, ops::ElementwiseAddGradKernel<plat::CUDADeviceContext, int64_t>);
int64_t>);
...@@ -600,7 +600,7 @@ HOSTDEVICE inline bool operator>=(const float16& a, const float16& b) { ...@@ -600,7 +600,7 @@ HOSTDEVICE inline bool operator>=(const float16& a, const float16& b) {
// Arithmetic operators for float16 on ARMv8.2-A CPU // Arithmetic operators for float16 on ARMv8.2-A CPU
#elif defined(PADDLE_WITH_NATIVE_FP16) #elif defined(PADDLE_WITH_NATIVE_FP16)
HOST inline float16 operator+(const float16& a, const float16& b) { inline float16 operator+(const float16& a, const float16& b) {
float16 res; float16 res;
asm volatile( asm volatile(
"ld1 {v0.h}[0], [%[a_ptr]]\n" "ld1 {v0.h}[0], [%[a_ptr]]\n"
...@@ -616,7 +616,7 @@ HOST inline float16 operator+(const float16& a, const float16& b) { ...@@ -616,7 +616,7 @@ HOST inline float16 operator+(const float16& a, const float16& b) {
return res; return res;
} }
HOST inline float16 operator-(const float16& a, const float16& b) { inline float16 operator-(const float16& a, const float16& b) {
float16 res; float16 res;
asm volatile( asm volatile(
"ld1 {v0.h}[0], [%[a_ptr]]\n" "ld1 {v0.h}[0], [%[a_ptr]]\n"
...@@ -632,7 +632,7 @@ HOST inline float16 operator-(const float16& a, const float16& b) { ...@@ -632,7 +632,7 @@ HOST inline float16 operator-(const float16& a, const float16& b) {
return res; return res;
} }
HOST inline float16 operator*(const float16& a, const float16& b) { inline float16 operator*(const float16& a, const float16& b) {
float16 res; float16 res;
asm volatile( asm volatile(
"ld1 {v0.h}[0], [%[a_ptr]]\n" "ld1 {v0.h}[0], [%[a_ptr]]\n"
...@@ -648,7 +648,7 @@ HOST inline float16 operator*(const float16& a, const float16& b) { ...@@ -648,7 +648,7 @@ HOST inline float16 operator*(const float16& a, const float16& b) {
return res; return res;
} }
HOST inline float16 operator/(const float16& a, const float16& b) { inline float16 operator/(const float16& a, const float16& b) {
float16 res; float16 res;
asm volatile( asm volatile(
"ld1 {v0.h}[0], [%[a_ptr]]\n" "ld1 {v0.h}[0], [%[a_ptr]]\n"
...@@ -664,7 +664,7 @@ HOST inline float16 operator/(const float16& a, const float16& b) { ...@@ -664,7 +664,7 @@ HOST inline float16 operator/(const float16& a, const float16& b) {
return res; return res;
} }
HOST inline float16 operator-(const float16& a) { inline float16 operator-(const float16& a) {
float16 res; float16 res;
asm volatile( asm volatile(
"ld1 {v0.h}[0], [%[a_ptr]]\n" "ld1 {v0.h}[0], [%[a_ptr]]\n"
...@@ -679,27 +679,27 @@ HOST inline float16 operator-(const float16& a) { ...@@ -679,27 +679,27 @@ HOST inline float16 operator-(const float16& a) {
return res; return res;
} }
HOST inline float16& operator+=(float16& a, const float16& b) { inline float16& operator+=(float16& a, const float16& b) {
a = a + b; a = a + b;
return a; return a;
} }
HOST inline float16& operator-=(float16& a, const float16& b) { inline float16& operator-=(float16& a, const float16& b) {
a = a - b; a = a - b;
return a; return a;
} }
HOST inline float16& operator*=(float16& a, const float16& b) { inline float16& operator*=(float16& a, const float16& b) {
a = a * b; a = a * b;
return a; return a;
} }
HOST inline float16& operator/=(float16& a, const float16& b) { inline float16& operator/=(float16& a, const float16& b) {
a = a / b; a = a / b;
return a; return a;
} }
HOST inline bool operator==(const float16& a, const float16& b) { inline bool operator==(const float16& a, const float16& b) {
uint16_t res; uint16_t res;
asm volatile( asm volatile(
"ld1 {v0.h}[0], [%[a_ptr]]\n" "ld1 {v0.h}[0], [%[a_ptr]]\n"
...@@ -715,11 +715,9 @@ HOST inline bool operator==(const float16& a, const float16& b) { ...@@ -715,11 +715,9 @@ HOST inline bool operator==(const float16& a, const float16& b) {
return (res & 0xffff) != 0; return (res & 0xffff) != 0;
} }
HOST inline bool operator!=(const float16& a, const float16& b) { inline bool operator!=(const float16& a, const float16& b) { return !(a == b); }
return !(a == b);
}
HOST inline bool operator<(const float16& a, const float16& b) { inline bool operator<(const float16& a, const float16& b) {
uint16_t res; uint16_t res;
asm volatile( asm volatile(
"ld1 {v1.h}[0], [%[a_ptr]]\n" "ld1 {v1.h}[0], [%[a_ptr]]\n"
...@@ -735,7 +733,7 @@ HOST inline bool operator<(const float16& a, const float16& b) { ...@@ -735,7 +733,7 @@ HOST inline bool operator<(const float16& a, const float16& b) {
return (res & 0xffff) != 0; return (res & 0xffff) != 0;
} }
HOST inline bool operator<=(const float16& a, const float16& b) { inline bool operator<=(const float16& a, const float16& b) {
uint16_t res; uint16_t res;
asm volatile( asm volatile(
"ld1 {v1.h}[0], [%[a_ptr]]\n" "ld1 {v1.h}[0], [%[a_ptr]]\n"
...@@ -751,7 +749,7 @@ HOST inline bool operator<=(const float16& a, const float16& b) { ...@@ -751,7 +749,7 @@ HOST inline bool operator<=(const float16& a, const float16& b) {
return (res & 0xffff) != 0; return (res & 0xffff) != 0;
} }
HOST inline bool operator>(const float16& a, const float16& b) { inline bool operator>(const float16& a, const float16& b) {
uint16_t res; uint16_t res;
asm volatile( asm volatile(
"ld1 {v0.h}[0], [%[a_ptr]]\n" "ld1 {v0.h}[0], [%[a_ptr]]\n"
...@@ -767,7 +765,7 @@ HOST inline bool operator>(const float16& a, const float16& b) { ...@@ -767,7 +765,7 @@ HOST inline bool operator>(const float16& a, const float16& b) {
return (res & 0xffff) != 0; return (res & 0xffff) != 0;
} }
HOST inline bool operator>=(const float16& a, const float16& b) { inline bool operator>=(const float16& a, const float16& b) {
uint16_t res; uint16_t res;
asm volatile( asm volatile(
"ld1 {v0.h}[0], [%[a_ptr]]\n" "ld1 {v0.h}[0], [%[a_ptr]]\n"
...@@ -785,69 +783,69 @@ HOST inline bool operator>=(const float16& a, const float16& b) { ...@@ -785,69 +783,69 @@ HOST inline bool operator>=(const float16& a, const float16& b) {
// Arithmetic operators for float16, software emulated on other CPU // Arithmetic operators for float16, software emulated on other CPU
#else #else
HOST inline float16 operator+(const float16& a, const float16& b) { inline float16 operator+(const float16& a, const float16& b) {
return float16(float(a) + float(b)); return float16(float(a) + float(b));
} }
HOST inline float16 operator-(const float16& a, const float16& b) { inline float16 operator-(const float16& a, const float16& b) {
return float16(float(a) - float(b)); return float16(float(a) - float(b));
} }
HOST inline float16 operator*(const float16& a, const float16& b) { inline float16 operator*(const float16& a, const float16& b) {
return float16(float(a) * float(b)); return float16(float(a) * float(b));
} }
HOST inline float16 operator/(const float16& a, const float16& b) { inline float16 operator/(const float16& a, const float16& b) {
return float16(float(a) / float(b)); return float16(float(a) / float(b));
} }
HOST inline float16 operator-(const float16& a) { inline float16 operator-(const float16& a) {
float16 res; float16 res;
res.x = a.x ^ 0x8000; res.x = a.x ^ 0x8000;
return res; return res;
} }
HOST inline float16& operator+=(float16& a, const float16& b) { inline float16& operator+=(float16& a, const float16& b) {
a = float16(float(a) + float(b)); a = float16(float(a) + float(b));
return a; return a;
} }
HOST inline float16& operator-=(float16& a, const float16& b) { inline float16& operator-=(float16& a, const float16& b) {
a = float16(float(a) - float(b)); a = float16(float(a) - float(b));
return a; return a;
} }
HOST inline float16& operator*=(float16& a, const float16& b) { inline float16& operator*=(float16& a, const float16& b) {
a = float16(float(a) * float(b)); a = float16(float(a) * float(b));
return a; return a;
} }
HOST inline float16& operator/=(float16& a, const float16& b) { inline float16& operator/=(float16& a, const float16& b) {
a = float16(float(a) / float(b)); a = float16(float(a) / float(b));
return a; return a;
} }
HOST inline bool operator==(const float16& a, const float16& b) { inline bool operator==(const float16& a, const float16& b) {
return float(a) == float(b); return float(a) == float(b);
} }
HOST inline bool operator!=(const float16& a, const float16& b) { inline bool operator!=(const float16& a, const float16& b) {
return float(a) != float(b); return float(a) != float(b);
} }
HOST inline bool operator<(const float16& a, const float16& b) { inline bool operator<(const float16& a, const float16& b) {
return float(a) < float(b); return float(a) < float(b);
} }
HOST inline bool operator<=(const float16& a, const float16& b) { inline bool operator<=(const float16& a, const float16& b) {
return float(a) <= float(b); return float(a) <= float(b);
} }
HOST inline bool operator>(const float16& a, const float16& b) { inline bool operator>(const float16& a, const float16& b) {
return float(a) > float(b); return float(a) > float(b);
} }
HOST inline bool operator>=(const float16& a, const float16& b) { inline bool operator>=(const float16& a, const float16& b) {
return float(a) >= float(b); return float(a) >= float(b);
} }
#endif #endif
......
...@@ -13,158 +13,243 @@ ...@@ -13,158 +13,243 @@
# limitations under the License. # limitations under the License.
import unittest import unittest
import numpy as np import numpy as np
import paddle.fluid.core as core
from op_test import OpTest from op_test import OpTest
class TestElementwiseOp(OpTest): class TestElementwiseAddOp(OpTest):
def setUp(self): def setUp(self):
self.op_type = "elementwise_add" self.op_type = "elementwise_add"
self.dtype = np.float32
self.axis = -1
self.init_dtype()
self.init_input_output()
self.init_axis()
self.inputs = { self.inputs = {
'X': np.random.uniform(0.1, 1, [13, 17]).astype("float32"), 'X': OpTest.np_dtype_to_fluid_dtype(self.x),
'Y': np.random.uniform(0.1, 1, [13, 17]).astype("float32") 'Y': OpTest.np_dtype_to_fluid_dtype(self.y)
} }
self.outputs = {'Out': np.add(self.inputs['X'], self.inputs['Y'])} self.attrs = {'axis': self.axis}
self.outputs = {'Out': self.out}
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output()
def test_check_grad_normal(self): def test_check_grad_normal(self):
if self.dtype == np.float16:
return
self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.005) self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.005)
def test_check_grad_ingore_x(self): def test_check_grad_ingore_x(self):
if self.dtype == np.float16:
return
self.check_grad( self.check_grad(
['Y'], 'Out', max_relative_error=0.005, no_grad_set=set("X")) ['Y'], 'Out', max_relative_error=0.005, no_grad_set=set("X"))
def test_check_grad_ingore_y(self): def test_check_grad_ingore_y(self):
if self.dtype == np.float16:
return
self.check_grad( self.check_grad(
['X'], 'Out', max_relative_error=0.005, no_grad_set=set('Y')) ['X'], 'Out', max_relative_error=0.005, no_grad_set=set('Y'))
def init_input_output(self):
self.x = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype)
self.y = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype)
self.out = np.add(self.x, self.y)
class TestElementwiseAddOp_scalar(TestElementwiseOp): def init_dtype(self):
def setUp(self): pass
self.op_type = "elementwise_add"
self.inputs = {
'X': np.random.rand(2, 3, 4).astype(np.float32),
'Y': np.random.rand(1).astype(np.float32)
}
self.outputs = {'Out': self.inputs['X'] + self.inputs['Y']}
def init_axis(self):
pass
class TestElementwiseAddOp_scalar2(TestElementwiseOp):
def setUp(self):
self.op_type = "elementwise_add"
self.inputs = {
'X': np.random.rand(2, 3, 4).astype(np.float32),
'Y': np.random.rand(1, 1).astype(np.float32)
}
self.outputs = {'Out': self.inputs['X'] + self.inputs['Y']}
class TestFP16ElementwiseAddOp(TestElementwiseAddOp):
def init_dtype(self):
self.dtype = np.float16
class TestElementwiseAddOp_Vector(TestElementwiseOp): def test_check_output(self):
def setUp(self): if core.is_compiled_with_cuda():
self.op_type = "elementwise_add" place = core.CUDAPlace(0)
self.inputs = { if core.is_float16_supported(place):
'X': np.random.random((32, )).astype("float32"), self.check_output_with_place(place, atol=1e-3)
'Y': np.random.random((32, )).astype("float32")
}
self.outputs = {'Out': np.add(self.inputs['X'], self.inputs['Y'])}
class TestElementwiseAddOp_broadcast_0(TestElementwiseOp): class TestElementwiseAddOp_scalar(TestElementwiseAddOp):
def setUp(self): def init_input_output(self):
self.op_type = "elementwise_add" self.x = np.random.rand(2, 3, 4).astype(self.dtype)
self.inputs = { self.y = np.random.rand(1).astype(self.dtype)
'X': np.random.rand(2, 3, 4).astype(np.float32), self.out = self.x + self.y
'Y': np.random.rand(2).astype(np.float32)
}
self.attrs = {'axis': 0}
self.outputs = {
'Out': self.inputs['X'] + self.inputs['Y'].reshape(2, 1, 1)
}
class TestFP16ElementwiseAddOp_scalar(TestFP16ElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(2, 3, 4).astype(self.dtype)
self.y = np.random.rand(1).astype(self.dtype)
self.out = self.x + self.y
class TestElementwiseAddOp_broadcast_1(TestElementwiseOp):
def setUp(self):
self.op_type = "elementwise_add"
self.inputs = {
'X': np.random.rand(2, 3, 4).astype(np.float32),
'Y': np.random.rand(3).astype(np.float32)
}
self.attrs = {'axis': 1} class TestElementwiseAddOp_scalar2(TestElementwiseAddOp):
self.outputs = { def init_input_output(self):
'Out': self.inputs['X'] + self.inputs['Y'].reshape(1, 3, 1) self.x = np.random.rand(2, 3, 4).astype(self.dtype)
} self.y = np.random.rand(1, 1).astype(self.dtype)
self.out = self.x + self.y
class TestElementwiseAddOp_broadcast_2(TestElementwiseOp): class TestFP16ElementwiseAddOp_scalar2(TestFP16ElementwiseAddOp):
def setUp(self): def init_input_output(self):
self.op_type = "elementwise_add" self.x = np.random.rand(2, 3, 4).astype(self.dtype)
self.inputs = { self.y = np.random.rand(1, 1).astype(self.dtype)
'X': np.random.rand(2, 3, 4).astype(np.float32), self.out = self.x + self.y
'Y': np.random.rand(4).astype(np.float32)
}
self.outputs = {
'Out': self.inputs['X'] + self.inputs['Y'].reshape(1, 1, 4)
}
class TestElementwiseAddOp_Vector(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.random((32, )).astype(self.dtype)
self.y = np.random.random((32, )).astype(self.dtype)
self.out = np.add(self.x, self.y)
class TestElementwiseAddOp_broadcast_3(TestElementwiseOp):
def setUp(self):
self.op_type = "elementwise_add"
self.inputs = {
'X': np.random.rand(2, 3, 4, 5).astype(np.float32),
'Y': np.random.rand(3, 4).astype(np.float32)
}
self.attrs = {'axis': 1} class TestFP16ElementwiseAddOp_Vector(TestFP16ElementwiseAddOp):
self.outputs = { def init_input_output(self):
'Out': self.inputs['X'] + self.inputs['Y'].reshape(1, 3, 4, 1) self.x = np.random.random((32, )).astype(self.dtype)
} self.y = np.random.random((32, )).astype(self.dtype)
self.out = np.add(self.x, self.y)
class TestElementwiseAddOp_broadcast_4(TestElementwiseOp): class TestElementwiseAddOp_broadcast_0(TestElementwiseAddOp):
def setUp(self): def init_input_output(self):
self.op_type = "elementwise_add" self.x = np.random.rand(2, 3, 4).astype(self.dtype)
self.inputs = { self.y = np.random.rand(2).astype(self.dtype)
'X': np.random.rand(2, 3, 4, 5).astype(np.float32), self.out = self.x + self.y.reshape(2, 1, 1)
'Y': np.random.rand(2, 1).astype(np.float32)
}
self.attrs = {'axis': 0} def init_axis(self):
self.outputs = { self.axis = 0
'Out': self.inputs['X'] + self.inputs['Y'].reshape(2, 1, 1, 1)
}
class TestElementwiseAddOp_rowwise_add_0(TestElementwiseOp): class TestFP16ElementwiseAddOp_broadcast_0(TestFP16ElementwiseAddOp):
def setUp(self): def init_input_output(self):
self.op_type = "elementwise_add" self.x = np.random.rand(2, 3, 4).astype(self.dtype)
self.inputs = { self.y = np.random.rand(2).astype(self.dtype)
'X': np.random.rand(2, 3, 4).astype(np.float32), self.out = self.x + self.y.reshape(2, 1, 1)
'Y': np.random.rand(3, 4).astype(np.float32)
}
self.attrs = {'axis': 1} def init_axis(self):
self.outputs = { self.axis = 0
'Out': self.inputs['X'] + self.inputs['Y'].reshape(1, 3, 4)
}
class TestElementwiseAddOp_rowwise_add_1(TestElementwiseOp): class TestElementwiseAddOp_broadcast_1(TestElementwiseAddOp):
def setUp(self): def init_input_output(self):
self.op_type = "elementwise_add" self.x = np.random.rand(2, 3, 4).astype(self.dtype)
self.inputs = { self.y = np.random.rand(3).astype(self.dtype)
'X': np.random.rand(2, 1).astype(np.float32), self.out = self.x + self.y.reshape(1, 3, 1)
'Y': np.random.rand(1).astype(np.float32)
}
self.attrs = {'axis': 1} def init_axis(self):
self.outputs = { self.axis = 1
'Out': self.inputs['X'] + self.inputs['Y'].reshape(1, 1)
}
class TestFP16ElementwiseAddOp_broadcast_1(TestFP16ElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(2, 3, 4).astype(self.dtype)
self.y = np.random.rand(3).astype(self.dtype)
self.out = self.x + self.y.reshape(1, 3, 1)
def init_axis(self):
self.axis = 1
class TestElementwiseAddOp_broadcast_2(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(2, 3, 4).astype(self.dtype)
self.y = np.random.rand(4).astype(self.dtype)
self.out = self.x + self.y.reshape(1, 1, 4)
class TestFP16ElementwiseAddOp_broadcast_2(TestFP16ElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(2, 3, 4).astype(self.dtype)
self.y = np.random.rand(4).astype(self.dtype)
self.out = self.x + self.y.reshape(1, 1, 4)
class TestElementwiseAddOp_broadcast_3(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(2, 3, 4, 5).astype(self.dtype)
self.y = np.random.rand(3, 4).astype(self.dtype)
self.out = self.x + self.y.reshape(1, 3, 4, 1)
def init_axis(self):
self.axis = 1
class TestFP16ElementwiseAddOp_broadcast_3(TestFP16ElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(2, 3, 4, 5).astype(self.dtype)
self.y = np.random.rand(3, 4).astype(self.dtype)
self.out = self.x + self.y.reshape(1, 3, 4, 1)
def init_axis(self):
self.axis = 1
class TestElementwiseAddOp_broadcast_4(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(2, 3, 4, 5).astype(self.dtype)
self.y = np.random.rand(2, 1).astype(self.dtype)
self.out = self.x + self.y.reshape(2, 1, 1, 1)
def init_axis(self):
self.axis = 0
class TestFP16ElementwiseAddOp_broadcast_4(TestFP16ElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(2, 3, 4, 5).astype(self.dtype)
self.y = np.random.rand(2, 1).astype(self.dtype)
self.out = self.x + self.y.reshape(2, 1, 1, 1)
def init_axis(self):
self.axis = 0
class TestElementwiseAddOp_rowwise_add_0(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(2, 3, 4).astype(self.dtype)
self.y = np.random.rand(3, 4).astype(self.dtype)
self.out = self.x + self.y.reshape(1, 3, 4)
def init_axis(self):
self.axis = 1
class TestFP16ElementwiseAddOp_rowwise_add_0(TestFP16ElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(2, 3, 4).astype(self.dtype)
self.y = np.random.rand(3, 4).astype(self.dtype)
self.out = self.x + self.y.reshape(1, 3, 4)
def init_axis(self):
self.axis = 1
class TestElementwiseAddOp_rowwise_add_1(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(2, 1).astype(self.dtype)
self.y = np.random.rand(1).astype(self.dtype)
self.out = self.x + self.y.reshape(1, 1)
def init_axis(self):
self.axis = 1
class TestFP16ElementwiseAddOp_rowwise_add_1(TestFP16ElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(2, 1).astype(self.dtype)
self.y = np.random.rand(1).astype(self.dtype)
self.out = self.x + self.y.reshape(1, 1)
def init_axis(self):
self.axis = 1
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册