未验证 提交 355caee1 编写于 作者: W wangchaochaohu 提交者: GitHub

fix conv_transpose Op fp16 error test=develop (#24695)

上级 19e5f787
......@@ -148,7 +148,7 @@ struct SearchAlgorithm<cudnnConvolutionFwdAlgoPerf_t> {
}
#endif
if (!exhaustive) {
if (!exhaustive && !deterministic) {
#if CUDNN_VERSION >= 7001
int perf_count;
int best_algo_idx = 0;
......@@ -185,6 +185,8 @@ struct SearchAlgorithm<cudnnConvolutionFwdAlgoPerf_t> {
workspace_size_limit, &algo));
#endif
VLOG(3) << "choose algo " << algo;
} else if (deterministic) {
algo = static_cast<cudnnConvolutionFwdAlgo_t>(1);
} else {
auto& dev_ctx =
ctx.template device_context<platform::CUDADeviceContext>();
......
......@@ -245,7 +245,8 @@ class CUDNNConvTransposeOpKernel : public framework::OpKernel<T> {
int output_offset =
transformed_output.numel() / transformed_output.dims()[0] / groups;
int filter_offset = filter->numel() / groups;
T alpha = static_cast<T>(1.0), beta = static_cast<T>(0.0);
ScalingParamType<T> alpha = 1.0f;
ScalingParamType<T> beta = 0.0f;
auto workspace_handle = dev_ctx.cudnn_workspace_handle();
for (int g = 0; g < groups; g++) {
auto cudnn_func = [&](void* cudnn_workspace) {
......@@ -493,7 +494,8 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel<T> {
int output_grad_offset = transformed_output_grad.numel() /
transformed_output_grad.dims()[0] / groups;
int filter_offset = filter->numel() / groups;
T alpha = static_cast<T>(1.0), beta = static_cast<T>(0.0);
ScalingParamType<T> alpha = 1.0f;
ScalingParamType<T> beta = 0.0f;
auto workspace_handle = dev_ctx.cudnn_workspace_handle();
if (input_grad) {
// Because beta is zero, it is unnecessary to reset input_grad.
......
......@@ -109,6 +109,7 @@ class TestConv2dTransposeOp(OpTest):
def setUp(self):
# init as conv transpose
self.dtype = np.float64
self.need_check_grad = True
self.is_test = False
self.use_cudnn = False
self.use_mkldnn = False
......@@ -152,35 +153,40 @@ class TestConv2dTransposeOp(OpTest):
self.check_output(check_dygraph=(self.use_mkldnn == False))
def test_check_grad_no_input(self):
if self.use_cudnn:
place = core.CUDAPlace(0)
self.check_grad_with_place(
place, ['Filter'],
'Output',
max_relative_error=0.02,
no_grad_set=set(['Input']))
else:
self.check_grad(['Filter'], 'Output', no_grad_set=set(['Input']))
if self.need_check_grad:
if self.use_cudnn:
place = core.CUDAPlace(0)
self.check_grad_with_place(
place, ['Filter'],
'Output',
max_relative_error=0.02,
no_grad_set=set(['Input']))
else:
self.check_grad(
['Filter'], 'Output', no_grad_set=set(['Input']))
def test_check_grad_no_filter(self):
if self.use_cudnn:
place = core.CUDAPlace(0)
self.check_grad_with_place(
place, ['Input'], 'Output', no_grad_set=set(['Filter']))
else:
self.check_grad(['Input'], 'Output', no_grad_set=set(['Filter']))
if self.need_check_grad:
if self.use_cudnn:
place = core.CUDAPlace(0)
self.check_grad_with_place(
place, ['Input'], 'Output', no_grad_set=set(['Filter']))
else:
self.check_grad(
['Input'], 'Output', no_grad_set=set(['Filter']))
def test_check_grad(self):
if self.use_cudnn:
place = core.CUDAPlace(0)
self.check_grad_with_place(
place,
set(['Input', 'Filter']),
'Output',
max_relative_error=0.02)
else:
self.check_grad(
set(['Input', 'Filter']), 'Output', max_relative_error=0.02)
if self.need_check_grad:
if self.use_cudnn:
place = core.CUDAPlace(0)
self.check_grad_with_place(
place,
set(['Input', 'Filter']),
'Output',
max_relative_error=0.02)
else:
self.check_grad(
set(['Input', 'Filter']), 'Output', max_relative_error=0.02)
def init_test_case(self):
self.pad = [0, 0]
......@@ -708,6 +714,124 @@ class TestDepthwiseConvTransposeAsymmetricPad_NHWC(TestConv2dTransposeOp):
self.data_format = 'NHWC'
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestCUDNN_FP16(TestConv2dTransposeOp):
def init_test_case(self):
self.dtype = np.float16
self.pad = [1, 1]
self.stride = [1, 1]
self.groups = 1
self.dilations = [1, 1]
self.input_size = [2, 3, 5, 5] # NCHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 3, 3]
def init_op_type(self):
self.need_check_grad = False
self.use_cudnn = True
self.op_type = "conv2d_transpose"
def test_check_output(self):
if self.use_cudnn:
place = core.CUDAPlace(0)
self.check_output_with_place(
place, atol=0.02, check_dygraph=(self.use_mkldnn == False))
else:
self.check_output(check_dygraph=(self.use_mkldnn == False))
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestCUDNN_NHWC_FP16(TestCUDNN_FP16):
def init_test_case(self):
self.dtype = np.float16
self.pad = [0, 0]
self.stride = [1, 1]
self.dilations = [1, 1]
self.groups = 1
self.input_size = [2, 5, 5, 3] # NHWC
f_c = self.input_size[-1]
self.filter_size = [f_c, 6, 3, 3]
self.data_format = 'NHWC'
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestCUDNNWithSymmetricPad_NHWC_FP16(TestCUDNN_FP16):
def init_test_case(self):
self.dtype = np.float16
self.pad = [1, 1]
self.stride = [1, 1]
self.groups = 1
self.dilations = [1, 1]
self.input_size = [2, 5, 5, 3] # NHWC
f_c = self.input_size[-1]
self.filter_size = [f_c, 6, 3, 3]
self.data_format = 'NHWC'
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestCUDNNWithAsymmetricPad_NHWC_FP16(TestCUDNN_FP16):
def init_test_case(self):
self.dtype = np.float16
self.pad = [1, 0, 2, 3]
self.stride = [2, 2]
self.groups = 1
self.dilations = [1, 1]
self.input_size = [2, 5, 5, 3] # NHWC
f_c = self.input_size[-1]
self.filter_size = [f_c, 6, 3, 3]
self.data_format = 'NHWC'
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestCUDNNWithStride_NHWC_FP16(TestCUDNN_FP16):
def init_test_case(self):
self.dtype = np.float16
self.pad = [1, 1]
self.stride = [2, 2]
self.groups = 1
self.dilations = [1, 1]
self.input_size = [2, 5, 5, 3] # NHWC
f_c = self.input_size[-1]
self.filter_size = [f_c, 6, 3, 3]
self.data_format = 'NHWC'
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestCUDNNWithGroups_NHWC_FP16(TestCUDNN_FP16):
def init_test_case(self):
self.dtype = np.float16
self.pad = [1, 1]
self.stride = [1, 1]
self.dilations = [1, 1]
self.groups = 2
self.input_size = [2, 5, 5, 4] # NCHW
f_c = self.input_size[-1]
self.filter_size = [f_c, 3, 3, 3]
self.data_format = 'NHWC'
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestCUDNNWithEvenUpsample_NHWC_FP16(TestCUDNN_FP16):
def init_test_case(self):
self.dtype = np.float16
self.pad = [2, 2]
self.stride = [2, 2]
self.groups = 1
self.dilations = [1, 1]
self.output_size = [14, 14]
self.input_size = [2, 7, 7, 3] # NHWC
f_c = self.input_size[-1]
self.filter_size = [f_c, 6, 5, 5]
self.data_format = 'NHWC'
class TestConv2dTransposeAPI(unittest.TestCase):
def test_case1(self):
data1 = fluid.layers.data(
......
......@@ -80,5 +80,6 @@ NO_FP16_CHECK_GRAD_OP_LIST = [
'fused_elemwise_activation', \
'pool2d', \
'pool3d', \
'softmax'
'softmax',\
'conv2d_transpose'
]
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册