未验证 提交 276017bb 编写于 作者: Z zhangyikun02 提交者: GitHub

conv2d support FP16 on xpu and update unittest for conv2d, test=kunlun (#40395)

上级 1eb96eec
...@@ -19,14 +19,16 @@ namespace operators { ...@@ -19,14 +19,16 @@ namespace operators {
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class GemmConvXPUKernel : public framework::OpKernel<T> { class GemmConvXPUKernel : public framework::OpKernel<T> {
using XPUT = typename XPUTypeTrait<T>::Type;
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext &context) const override {
const Tensor* input = context.Input<Tensor>("Input"); const Tensor *input = context.Input<Tensor>("Input");
// The filter will be reshaped in the calculations, // The filter will be reshaped in the calculations,
// so here use an assignment operation, // so here use an assignment operation,
// that avoids modifying the variable in the Scope. // that avoids modifying the variable in the Scope.
Tensor filter = *context.Input<Tensor>("Filter"); Tensor filter = *context.Input<Tensor>("Filter");
Tensor* output = context.Output<Tensor>("Output"); Tensor *output = context.Output<Tensor>("Output");
output->mutable_data<T>(context.GetPlace()); output->mutable_data<T>(context.GetPlace());
int groups = context.Attr<int>("groups"); int groups = context.Attr<int>("groups");
std::vector<int> strides = context.Attr<std::vector<int>>("strides"); std::vector<int> strides = context.Attr<std::vector<int>>("strides");
...@@ -53,11 +55,16 @@ class GemmConvXPUKernel : public framework::OpKernel<T> { ...@@ -53,11 +55,16 @@ class GemmConvXPUKernel : public framework::OpKernel<T> {
const int img_h = static_cast<int>(input->dims()[2]); const int img_h = static_cast<int>(input->dims()[2]);
const int img_w = static_cast<int>(input->dims()[3]); const int img_w = static_cast<int>(input->dims()[3]);
const int f = static_cast<int>(filter.dims()[0]); const int f = static_cast<int>(filter.dims()[0]);
auto& dev_ctx = context.template device_context<DeviceContext>();
int r = xpu::conv2d<float, float, float, int16_t>( const XPUT *input_data = reinterpret_cast<const XPUT *>(input->data<T>());
dev_ctx.x_context(), input->data<float>(), filter.data<float>(), const XPUT *filter_data = reinterpret_cast<const XPUT *>(filter.data<T>());
output->data<float>(), batch_size, img_c, img_h, img_w, f, ksize, XPUT *output_data = reinterpret_cast<XPUT *>(output->data<T>());
strides, paddings, dilations, groups, nullptr, nullptr, nullptr, true);
auto &dev_ctx = context.template device_context<DeviceContext>();
int r = xpu::conv2d<XPUT, XPUT, XPUT, int16_t>(
dev_ctx.x_context(), input_data, filter_data, output_data, batch_size,
img_c, img_h, img_w, f, ksize, strides, paddings, dilations, groups,
nullptr, nullptr, nullptr, true);
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
r, XPU_SUCCESS, r, XPU_SUCCESS,
platform::errors::External("XPU conv kernel return wrong value[%d %s]", platform::errors::External("XPU conv kernel return wrong value[%d %s]",
...@@ -67,14 +74,16 @@ class GemmConvXPUKernel : public framework::OpKernel<T> { ...@@ -67,14 +74,16 @@ class GemmConvXPUKernel : public framework::OpKernel<T> {
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class GemmConvGradXPUKernel : public framework::OpKernel<T> { class GemmConvGradXPUKernel : public framework::OpKernel<T> {
using XPUT = typename XPUTypeTrait<T>::Type;
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext &context) const override {
const Tensor* input = context.Input<Tensor>("Input"); const Tensor *input = context.Input<Tensor>("Input");
const Tensor* output_grad = const Tensor *output_grad =
context.Input<Tensor>(framework::GradVarName("Output")); context.Input<Tensor>(framework::GradVarName("Output"));
Tensor* input_grad = Tensor *input_grad =
context.Output<Tensor>(framework::GradVarName("Input")); context.Output<Tensor>(framework::GradVarName("Input"));
Tensor* filter_grad = Tensor *filter_grad =
context.Output<Tensor>(framework::GradVarName("Filter")); context.Output<Tensor>(framework::GradVarName("Filter"));
// The filter and filter_grad will be reshaped in the calculations, // The filter and filter_grad will be reshaped in the calculations,
// so here use an assignment operation, // so here use an assignment operation,
...@@ -107,19 +116,27 @@ class GemmConvGradXPUKernel : public framework::OpKernel<T> { ...@@ -107,19 +116,27 @@ class GemmConvGradXPUKernel : public framework::OpKernel<T> {
const int img_h = static_cast<int>(input->dims()[2]); const int img_h = static_cast<int>(input->dims()[2]);
const int img_w = static_cast<int>(input->dims()[3]); const int img_w = static_cast<int>(input->dims()[3]);
const int f = static_cast<int>(filter.dims()[0]); const int f = static_cast<int>(filter.dims()[0]);
const XPUT *input_data = reinterpret_cast<const XPUT *>(input->data<T>());
const XPUT *filter_data = reinterpret_cast<const XPUT *>(filter.data<T>());
const XPUT *output_grad_data =
reinterpret_cast<const XPUT *>(output_grad->data<T>());
XPUT *input_grad_data = nullptr;
if (input_grad) { if (input_grad) {
input_grad->mutable_data<T>(context.GetPlace()); input_grad->mutable_data<T>(context.GetPlace());
input_grad_data = reinterpret_cast<XPUT *>(input_grad->data<T>());
} }
XPUT *filter_grad_data = nullptr;
if (filter_grad) { if (filter_grad) {
filter_grad->mutable_data<T>(context.GetPlace()); filter_grad->mutable_data<T>(context.GetPlace());
filter_grad_data = reinterpret_cast<XPUT *>(filter_grad->data<T>());
} }
auto& dev_ctx = context.template device_context<DeviceContext>(); auto &dev_ctx = context.template device_context<DeviceContext>();
int r = xpu::conv2d_grad<float, float, float, int16_t>( int r = xpu::conv2d_grad<XPUT, XPUT, XPUT, int16_t>(
dev_ctx.x_context(), input->data<T>(), filter.data<T>(), dev_ctx.x_context(), input_data, filter_data, output_grad_data,
output_grad->data<T>(), input_grad ? input_grad->data<T>() : nullptr, input_grad_data, filter_grad_data, batch_size, img_c, img_h, img_w, f,
filter_grad ? filter_grad->data<T>() : nullptr, batch_size, img_c, ksize, strides, paddings, dilations, groups, nullptr, nullptr, nullptr,
img_h, img_w, f, ksize, strides, paddings, dilations, groups, nullptr, nullptr, nullptr, true);
nullptr, nullptr, nullptr, nullptr, true);
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
r, XPU_SUCCESS, r, XPU_SUCCESS,
platform::errors::External("XPU conv kernel return wrong value[%d %s]", platform::errors::External("XPU conv kernel return wrong value[%d %s]",
...@@ -130,14 +147,22 @@ class GemmConvGradXPUKernel : public framework::OpKernel<T> { ...@@ -130,14 +147,22 @@ class GemmConvGradXPUKernel : public framework::OpKernel<T> {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_XPU_KERNEL( REGISTER_OP_XPU_KERNEL(
depthwise_conv2d, conv2d, ops::GemmConvXPUKernel<paddle::platform::XPUDeviceContext, float>,
ops::GemmConvXPUKernel<paddle::platform::XPUDeviceContext, float>); ops::GemmConvXPUKernel<paddle::platform::XPUDeviceContext,
REGISTER_OP_XPU_KERNEL( paddle::platform::float16>);
conv2d, ops::GemmConvXPUKernel<paddle::platform::XPUDeviceContext, float>);
REGISTER_OP_XPU_KERNEL( REGISTER_OP_XPU_KERNEL(
conv2d_grad, conv2d_grad,
ops::GemmConvGradXPUKernel<paddle::platform::XPUDeviceContext, float>); ops::GemmConvGradXPUKernel<paddle::platform::XPUDeviceContext, float>,
ops::GemmConvGradXPUKernel<paddle::platform::XPUDeviceContext,
paddle::platform::float16>);
REGISTER_OP_XPU_KERNEL(
depthwise_conv2d,
ops::GemmConvXPUKernel<paddle::platform::XPUDeviceContext, float>,
ops::GemmConvXPUKernel<paddle::platform::XPUDeviceContext,
paddle::platform::float16>);
REGISTER_OP_XPU_KERNEL( REGISTER_OP_XPU_KERNEL(
depthwise_conv2d_grad, depthwise_conv2d_grad,
ops::GemmConvGradXPUKernel<paddle::platform::XPUDeviceContext, float>); ops::GemmConvGradXPUKernel<paddle::platform::XPUDeviceContext, float>,
ops::GemmConvGradXPUKernel<paddle::platform::XPUDeviceContext,
paddle::platform::float16>);
#endif #endif
...@@ -51,16 +51,20 @@ XPUOpMap& get_kl2_ops() { ...@@ -51,16 +51,20 @@ XPUOpMap& get_kl2_ops() {
{"clip", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"clip", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"concat_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"concat_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"concat", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"concat", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"conv2d_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"conv2d_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
{"conv2d", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, pOpKernelType(vartype::FP16, XPUPlace())})},
{"conv2d", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
pOpKernelType(vartype::FP16, XPUPlace())})},
{"conv2d_transpose_grad", {"conv2d_transpose_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"conv2d_transpose", {"conv2d_transpose",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"depthwise_conv2d_grad", {"depthwise_conv2d_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
pOpKernelType(vartype::FP16, XPUPlace())})},
{"depthwise_conv2d", {"depthwise_conv2d",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
pOpKernelType(vartype::FP16, XPUPlace())})},
{"dropout_grad", {"dropout_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"dropout", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"dropout", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
......
...@@ -23,6 +23,7 @@ import paddle.fluid as fluid ...@@ -23,6 +23,7 @@ import paddle.fluid as fluid
from op_test_xpu import XPUOpTest from op_test_xpu import XPUOpTest
import paddle import paddle
from paddle.fluid import Program, program_guard from paddle.fluid import Program, program_guard
from xpu.get_test_cover_info import create_test_class, get_xpu_op_support_types, XPUOpTestWrapper
def conv2d_forward_naive(input, def conv2d_forward_naive(input,
...@@ -159,320 +160,334 @@ def create_test_padding_VALID_class(parent): ...@@ -159,320 +160,334 @@ def create_test_padding_VALID_class(parent):
globals()[cls_name] = TestPaddingVALIDCase globals()[cls_name] = TestPaddingVALIDCase
class TestConv2DOp(XPUOpTest): class XPUTestConv2DOp(XPUOpTestWrapper):
def setUp(self): def __init__(self):
self.op_type = "conv2d" self.op_name = 'conv2d'
self.use_cudnn = False self.use_dynamic_create_class = False
self.exhaustive_search = False
self.use_cuda = False class TestConv2DOp(XPUOpTest):
self.use_mkldnn = False def setUp(self):
self.fuse_relu_before_depthwise_conv = False self.dtype = self.in_type
self.data_format = "AnyLayout" self.place = paddle.XPUPlace(0)
self.dtype = np.float32 self.op_type = "conv2d"
self.init_kernel_type() self.use_cudnn = False
self.init_group() self.exhaustive_search = False
self.init_dilation() self.use_cuda = False
self.init_test_case() self.use_mkldnn = False
conv2d_param = {
'stride': self.stride,
'pad': self.pad,
'dilation': self.dilations
}
input = np.random.random(self.input_size).astype(self.dtype)
if not self.has_cuda():
self.fuse_relu_before_depthwise_conv = False self.fuse_relu_before_depthwise_conv = False
if self.fuse_relu_before_depthwise_conv: self.data_format = "AnyLayout"
input = input - 0.5 self.init_kernel_type()
input -= (input < 0) * 0.1 self.init_group()
input += (input >= 0) * 0.1 self.init_dilation()
input2 = np.maximum(input, 0.0) self.init_test_case()
else:
input2 = input conv2d_param = {
filter = np.random.uniform(-1, 1, self.filter_size).astype(self.dtype) 'stride': self.stride,
'pad': self.pad,
output, _, _, _, _ = conv2d_forward_naive(input2, filter, self.groups, 'dilation': self.dilations
conv2d_param) }
output = output.astype(self.dtype)
np.random.seed(100)
self.inputs = { input = np.random.random(self.input_size).astype(self.dtype)
'Input': XPUOpTest.np_dtype_to_fluid_dtype(input), if not self.has_cuda():
'Filter': XPUOpTest.np_dtype_to_fluid_dtype(filter) self.fuse_relu_before_depthwise_conv = False
} if self.fuse_relu_before_depthwise_conv:
self.attrs = { input = input - 0.5
'strides': self.stride, input -= (input < 0) * 0.1
'paddings': self.pad, input += (input >= 0) * 0.1
'groups': self.groups, input2 = np.maximum(input, 0.0)
'dilations': self.dilations, else:
'use_cudnn': self.use_cudnn, input2 = input
'use_mkldnn': self.use_mkldnn, np.random.seed(1)
'data_format': self.data_format, filter = np.random.uniform(-1, 1,
'fuse_relu_before_depthwise_conv': self.filter_size).astype(self.dtype)
self.fuse_relu_before_depthwise_conv,
'exhaustive_search': self.exhaustive_search output, _, _, _, _ = conv2d_forward_naive(input2, filter,
} self.groups, conv2d_param)
self.outputs = {'Output': output} output = output.astype(self.dtype)
def has_cuda(self): self.inputs = {
return core.is_compiled_with_cuda() and (self.use_cudnn or 'Input': XPUOpTest.np_dtype_to_fluid_dtype(input),
self.use_cuda) 'Filter': XPUOpTest.np_dtype_to_fluid_dtype(filter)
}
def test_check_output(self): self.attrs = {
if core.is_compiled_with_xpu(): 'strides': self.stride,
paddle.enable_static() 'paddings': self.pad,
place = paddle.XPUPlace(0) 'groups': self.groups,
self.check_output_with_place(place) 'dilations': self.dilations,
'use_cudnn': self.use_cudnn,
def test_check_grad(self): 'use_mkldnn': self.use_mkldnn,
if self.dtype == np.float16 or (hasattr(self, "no_need_check_grad") and 'data_format': self.data_format,
self.no_need_check_grad == True): 'fuse_relu_before_depthwise_conv':
return self.fuse_relu_before_depthwise_conv,
if core.is_compiled_with_xpu(): 'exhaustive_search': self.exhaustive_search
paddle.enable_static() }
place = paddle.XPUPlace(0) self.outputs = {'Output': output}
self.check_grad_with_place(place, {'Input', 'Filter'}, 'Output')
def has_cuda(self):
def test_check_grad_no_filter(self): return core.is_compiled_with_cuda() and (self.use_cudnn or
if self.dtype == np.float16 or (hasattr(self, "no_need_check_grad") and self.use_cuda)
self.no_need_check_grad == True):
return def test_check_output(self):
if core.is_compiled_with_xpu(): if core.is_compiled_with_xpu():
paddle.enable_static() paddle.enable_static()
place = paddle.XPUPlace(0) self.check_output_with_place(self.place)
self.check_grad_with_place(
place, ['Input'], 'Output', no_grad_set=set(['Filter'])) def test_check_grad(self):
if (hasattr(self, "no_need_check_grad") and
def test_check_grad_no_input(self): self.no_need_check_grad == True):
if self.dtype == np.float16 or (hasattr(self, "no_need_check_grad") and return
self.no_need_check_grad == True): if core.is_compiled_with_xpu():
return paddle.enable_static()
if core.is_compiled_with_xpu(): self.check_grad_with_place(self.place, {'Input', 'Filter'},
paddle.enable_static() 'Output')
place = paddle.XPUPlace(0)
self.check_grad_with_place( def test_check_grad_no_filter(self):
place, ['Filter'], 'Output', no_grad_set=set(['Input'])) if (hasattr(self, "no_need_check_grad") and
self.no_need_check_grad == True):
def init_test_case(self): return
self.pad = [0, 0] if core.is_compiled_with_xpu():
self.stride = [1, 1] paddle.enable_static()
self.input_size = [2, 3, 5, 5] # NCHW self.check_grad_with_place(
assert np.mod(self.input_size[1], self.groups) == 0 self.place, ['Input'],
f_c = self.input_size[1] // self.groups 'Output',
self.filter_size = [6, f_c, 3, 3] no_grad_set=set(['Filter']))
def init_test_case_2(self): def test_check_grad_no_input(self):
pass if (hasattr(self, "no_need_check_grad") and
self.no_need_check_grad == True):
def init_dilation(self): return
self.dilations = [1, 1] if core.is_compiled_with_xpu():
paddle.enable_static()
def init_group(self): self.check_grad_with_place(
self.groups = 1 self.place, ['Filter'],
'Output',
def init_kernel_type(self): no_grad_set=set(['Input']))
pass
def init_test_case(self):
self.pad = [0, 0]
class TestWithPad(TestConv2DOp): self.stride = [1, 1]
def init_test_case(self): self.input_size = [2, 3, 5, 5] # NCHW
self.pad = [1, 1] assert np.mod(self.input_size[1], self.groups) == 0
self.stride = [1, 1] f_c = self.input_size[1] // self.groups
self.input_size = [2, 3, 5, 5] # NCHW self.filter_size = [6, f_c, 3, 3]
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 3, 3]
class TestWithStride(TestConv2DOp):
def init_test_case(self):
self.pad = [1, 1]
self.stride = [2, 2]
self.input_size = [2, 3, 6, 6] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 3, 3]
class TestWith1x1(TestConv2DOp):
def init_test_case(self):
self.pad = [0, 0]
self.stride = [1, 1]
self.input_size = [2, 3, 5, 5] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [120, f_c, 1, 1]
def init_group(self):
self.groups = 1
# Please Don't remove the following code.
# Currently, CI use cudnn V5.0 which not support dilation conv.
# class TestCUDNNWithDilation(TestWithDilation):
# def init_op_type(self):
# self.op_type = "conv_cudnn"
# ---- test asymmetric padding ---- def init_test_case_2(self):
pass
def init_dilation(self):
self.dilations = [1, 1]
def init_group(self):
self.groups = 1
def init_kernel_type(self):
pass
class TestWithPad(TestConv2DOp):
def init_test_case(self):
self.pad = [1, 1]
self.stride = [1, 1]
self.input_size = [2, 3, 5, 5] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 3, 3]
class TestWithStride(TestConv2DOp):
def init_test_case(self):
self.pad = [1, 1]
self.stride = [2, 2]
self.input_size = [2, 3, 6, 6] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 3, 3]
class TestWith1x1(TestConv2DOp):
def init_test_case(self):
self.pad = [0, 0]
self.stride = [1, 1]
self.input_size = [2, 3, 5, 5] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [120, f_c, 1, 1]
def init_group(self):
self.groups = 1
class TestConv2DOp_v2(XPUOpTest):
def setUp(self): # ---- test asymmetric padding ----
self.op_type = "conv2d" class XPUTestConv2DOp_v2(XPUOpTestWrapper):
self.use_cudnn = False def __init__(self):
self.exhaustive_search = False self.op_name = 'conv2d'
self.use_cuda = False self.use_dynamic_create_class = False
self.use_mkldnn = False
self.fuse_relu_before_depthwise_conv = False class TestConv2DOp_v2(XPUOpTest):
self.dtype = np.float32 def setUp(self):
self.init_kernel_type() self.dtype = self.in_type
self.init_group() self.place = paddle.XPUPlace(0)
self.init_dilation() self.op_type = "conv2d"
self.init_data_format() self.use_cudnn = False
self.init_test_case() self.exhaustive_search = False
self.init_paddings() self.use_cuda = False
self.init_test_case_2() self.use_mkldnn = False
conv2d_param = {
'stride': self.stride,
'pad': self.pad,
'dilation': self.dilations
}
input = np.random.random(self.input_size).astype(self.dtype)
if not self.has_cuda():
self.fuse_relu_before_depthwise_conv = False self.fuse_relu_before_depthwise_conv = False
if self.fuse_relu_before_depthwise_conv: self.init_kernel_type()
input = input - 0.5 self.init_group()
input -= (input < 0) * 0.1 self.init_dilation()
input += (input >= 0) * 0.1 self.init_data_format()
input2 = np.maximum(input, 0.0) self.init_test_case()
else: self.init_paddings()
input2 = input self.init_test_case_2()
filter = np.random.uniform(-1, 1, self.filter_size).astype(self.dtype)
output, _, _, _, _ = conv2d_forward_naive( conv2d_param = {
input2, filter, self.groups, conv2d_param, self.padding_algorithm, 'stride': self.stride,
self.data_format) 'pad': self.pad,
output = output.astype(self.dtype) 'dilation': self.dilations
}
self.inputs = {
'Input': XPUOpTest.np_dtype_to_fluid_dtype(input), np.random.seed(100)
'Filter': XPUOpTest.np_dtype_to_fluid_dtype(filter) input = np.random.random(self.input_size).astype(self.dtype)
} if not self.has_cuda():
self.attrs = { self.fuse_relu_before_depthwise_conv = False
'strides': self.stride, if self.fuse_relu_before_depthwise_conv:
'paddings': self.pad, input = input - 0.5
'padding_algorithm': self.padding_algorithm, input -= (input < 0) * 0.1
'groups': self.groups, input += (input >= 0) * 0.1
'dilations': self.dilations, input2 = np.maximum(input, 0.0)
'use_cudnn': self.use_cudnn, else:
'use_mkldnn': self.use_mkldnn, input2 = input
'data_format': self.data_format, np.random.seed(8)
'fuse_relu_before_depthwise_conv': filter = np.random.uniform(-1, 1,
self.fuse_relu_before_depthwise_conv, self.filter_size).astype(self.dtype)
'exhaustive_search': self.exhaustive_search output, _, _, _, _ = conv2d_forward_naive(
} input2, filter, self.groups, conv2d_param,
self.outputs = {'Output': output} self.padding_algorithm, self.data_format)
output = output.astype(self.dtype)
def has_cuda(self):
return core.is_compiled_with_cuda() and (self.use_cudnn or self.inputs = {
self.use_cuda) 'Input': XPUOpTest.np_dtype_to_fluid_dtype(input),
'Filter': XPUOpTest.np_dtype_to_fluid_dtype(filter)
def test_check_output(self): }
# TODO(wangzhongpu): support mkldnn op in dygraph mode self.attrs = {
if core.is_compiled_with_xpu(): 'strides': self.stride,
paddle.enable_static() 'paddings': self.pad,
place = paddle.XPUPlace(0) 'padding_algorithm': self.padding_algorithm,
self.check_output_with_place(place) 'groups': self.groups,
'dilations': self.dilations,
def test_check_grad(self): 'use_cudnn': self.use_cudnn,
# TODO(wangzhongpu): support mkldnn op in dygraph mode 'use_mkldnn': self.use_mkldnn,
if self.dtype == np.float16: 'data_format': self.data_format,
return 'fuse_relu_before_depthwise_conv':
if core.is_compiled_with_xpu(): self.fuse_relu_before_depthwise_conv,
paddle.enable_static() 'exhaustive_search': self.exhaustive_search
place = paddle.XPUPlace(0) }
self.check_grad_with_place(place, {'Input', 'Filter'}, 'Output') self.outputs = {'Output': output}
def test_check_grad_no_filter(self): def has_cuda(self):
# TODO(wangzhongpu): support mkldnn op in dygraph mode return core.is_compiled_with_cuda() and (self.use_cudnn or
if self.dtype == np.float16: self.use_cuda)
return
if core.is_compiled_with_xpu(): def test_check_output(self):
paddle.enable_static() # TODO(wangzhongpu): support mkldnn op in dygraph mode
place = paddle.XPUPlace(0) if core.is_compiled_with_xpu():
self.check_grad_with_place( paddle.enable_static()
place, ['Input'], 'Output', no_grad_set=set(['Filter'])) self.check_output_with_place(place=self.place)
def test_check_grad_no_input(self): def test_check_grad(self):
# TODO(wangzhongpu): support mkldnn op in dygraph mode # TODO(wangzhongpu): support mkldnn op in dygraph mode
if self.dtype == np.float16: if (hasattr(self, "no_need_check_grad") and
return self.no_need_check_grad == True):
if core.is_compiled_with_xpu(): return
paddle.enable_static() if core.is_compiled_with_xpu():
place = paddle.XPUPlace(0) paddle.enable_static()
self.check_grad_with_place( self.check_grad_with_place(self.place, {'Input', 'Filter'},
place, ['Filter'], 'Output', no_grad_set=set(['Input'])) 'Output')
def init_test_case(self): def test_check_grad_no_filter(self):
self.pad = [0, 0] # TODO(wangzhongpu): support mkldnn op in dygraph mode
self.stride = [1, 2] if (hasattr(self, "no_need_check_grad") and
self.input_size = [2, 3, 5, 5] # NCHW self.no_need_check_grad == True):
assert np.mod(self.input_size[1], self.groups) == 0 return
f_c = self.input_size[1] // self.groups if core.is_compiled_with_xpu():
self.filter_size = [6, f_c, 4, 3] paddle.enable_static()
self.check_grad_with_place(
def init_dilation(self): self.place, ['Input'],
self.dilations = [1, 1] 'Output',
no_grad_set=set(['Filter']))
def init_group(self):
self.groups = 1 def test_check_grad_no_input(self):
# TODO(wangzhongpu): support mkldnn op in dygraph mode
def init_kernel_type(self): if (hasattr(self, "no_need_check_grad") and
pass self.no_need_check_grad == True):
return
def init_paddings(self): if core.is_compiled_with_xpu():
self.pad = [0, 0] paddle.enable_static()
self.padding_algorithm = "EXPLICIT" self.check_grad_with_place(
self.place, ['Filter'],
def init_data_format(self): 'Output',
self.data_format = "NCHW" no_grad_set=set(['Input']))
def init_test_case_2(self): def init_test_case(self):
pass self.pad = [0, 0]
self.stride = [1, 2]
self.input_size = [2, 3, 5, 5] # NCHW
class TestConv2DOp_AsyPadding(TestConv2DOp_v2): assert np.mod(self.input_size[1], self.groups) == 0
def init_paddings(self): f_c = self.input_size[1] // self.groups
self.pad = [0, 0, 0, 0] self.filter_size = [6, f_c, 4, 3]
self.padding_algorithm = "EXPLICIT"
def init_dilation(self):
self.dilations = [1, 1]
class TestWithPad_AsyPadding(TestConv2DOp_v2):
def init_test_case(self): def init_group(self):
self.stride = [1, 1] self.groups = 1
self.input_size = [2, 3, 5, 5] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0 def init_kernel_type(self):
f_c = self.input_size[1] // self.groups pass
self.filter_size = [6, f_c, 3, 3]
def init_paddings(self):
def init_paddings(self): self.pad = [0, 0]
self.pad = [1, 1, 1, 1] self.padding_algorithm = "EXPLICIT"
self.padding_algorithm = "EXPLICIT"
def init_data_format(self):
self.data_format = "NCHW"
class TestWithStride_AsyPadding(TestConv2DOp_v2):
def init_test_case(self): def init_test_case_2(self):
self.stride = [2, 2] pass
self.input_size = [2, 3, 6, 6] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0 class TestConv2DOp_AsyPadding(TestConv2DOp_v2):
f_c = self.input_size[1] // self.groups def init_paddings(self):
self.filter_size = [6, f_c, 3, 3] self.pad = [0, 0, 0, 0]
self.padding_algorithm = "EXPLICIT"
def init_paddings(self):
self.pad = [1, 1, 1, 1] class TestWithPad_AsyPadding(TestConv2DOp_v2):
self.padding_algorithm = "EXPLICIT" def init_test_case(self):
self.stride = [1, 1]
self.input_size = [2, 3, 5, 5] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 3, 3]
def init_paddings(self):
self.pad = [1, 1, 1, 1]
self.padding_algorithm = "EXPLICIT"
class TestWithStride_AsyPadding(TestConv2DOp_v2):
def init_test_case(self):
self.stride = [2, 2]
self.input_size = [2, 3, 6, 6] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 3, 3]
def init_paddings(self):
self.pad = [1, 1, 1, 1]
self.padding_algorithm = "EXPLICIT"
support_types = get_xpu_op_support_types('conv2d')
for stype in support_types:
create_test_class(globals(), XPUTestConv2DOp, stype)
create_test_class(globals(), XPUTestConv2DOp_v2, stype)
#---------- test SAME VALID ----------- #---------- test SAME VALID -----------
#create_test_padding_SAME_class(TestConv2DOp_AsyPadding) #create_test_padding_SAME_class(TestConv2DOp_AsyPadding)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册