未验证 提交 863f9e55 编写于 作者: W wangchaochaohu 提交者: GitHub

fix conv_transpose Op fp16 error test=develop (#24695) (#24784)

上级 627d5567
...@@ -148,7 +148,7 @@ struct SearchAlgorithm<cudnnConvolutionFwdAlgoPerf_t> { ...@@ -148,7 +148,7 @@ struct SearchAlgorithm<cudnnConvolutionFwdAlgoPerf_t> {
} }
#endif #endif
if (!exhaustive) { if (!exhaustive && !deterministic) {
#if CUDNN_VERSION >= 7001 #if CUDNN_VERSION >= 7001
int perf_count; int perf_count;
int best_algo_idx = 0; int best_algo_idx = 0;
...@@ -185,6 +185,8 @@ struct SearchAlgorithm<cudnnConvolutionFwdAlgoPerf_t> { ...@@ -185,6 +185,8 @@ struct SearchAlgorithm<cudnnConvolutionFwdAlgoPerf_t> {
workspace_size_limit, &algo)); workspace_size_limit, &algo));
#endif #endif
VLOG(3) << "choose algo " << algo; VLOG(3) << "choose algo " << algo;
} else if (deterministic) {
algo = static_cast<cudnnConvolutionFwdAlgo_t>(1);
} else { } else {
auto& dev_ctx = auto& dev_ctx =
ctx.template device_context<platform::CUDADeviceContext>(); ctx.template device_context<platform::CUDADeviceContext>();
......
...@@ -245,7 +245,8 @@ class CUDNNConvTransposeOpKernel : public framework::OpKernel<T> { ...@@ -245,7 +245,8 @@ class CUDNNConvTransposeOpKernel : public framework::OpKernel<T> {
int output_offset = int output_offset =
transformed_output.numel() / transformed_output.dims()[0] / groups; transformed_output.numel() / transformed_output.dims()[0] / groups;
int filter_offset = filter->numel() / groups; int filter_offset = filter->numel() / groups;
T alpha = static_cast<T>(1.0), beta = static_cast<T>(0.0); ScalingParamType<T> alpha = 1.0f;
ScalingParamType<T> beta = 0.0f;
auto workspace_handle = dev_ctx.cudnn_workspace_handle(); auto workspace_handle = dev_ctx.cudnn_workspace_handle();
for (int g = 0; g < groups; g++) { for (int g = 0; g < groups; g++) {
auto cudnn_func = [&](void* cudnn_workspace) { auto cudnn_func = [&](void* cudnn_workspace) {
...@@ -493,7 +494,8 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel<T> { ...@@ -493,7 +494,8 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel<T> {
int output_grad_offset = transformed_output_grad.numel() / int output_grad_offset = transformed_output_grad.numel() /
transformed_output_grad.dims()[0] / groups; transformed_output_grad.dims()[0] / groups;
int filter_offset = filter->numel() / groups; int filter_offset = filter->numel() / groups;
T alpha = static_cast<T>(1.0), beta = static_cast<T>(0.0); ScalingParamType<T> alpha = 1.0f;
ScalingParamType<T> beta = 0.0f;
auto workspace_handle = dev_ctx.cudnn_workspace_handle(); auto workspace_handle = dev_ctx.cudnn_workspace_handle();
if (input_grad) { if (input_grad) {
// Because beta is zero, it is unnecessary to reset input_grad. // Because beta is zero, it is unnecessary to reset input_grad.
......
...@@ -109,6 +109,7 @@ class TestConv2dTransposeOp(OpTest): ...@@ -109,6 +109,7 @@ class TestConv2dTransposeOp(OpTest):
def setUp(self): def setUp(self):
# init as conv transpose # init as conv transpose
self.dtype = np.float64 self.dtype = np.float64
self.need_check_grad = True
self.is_test = False self.is_test = False
self.use_cudnn = False self.use_cudnn = False
self.use_mkldnn = False self.use_mkldnn = False
...@@ -152,35 +153,40 @@ class TestConv2dTransposeOp(OpTest): ...@@ -152,35 +153,40 @@ class TestConv2dTransposeOp(OpTest):
self.check_output(check_dygraph=(self.use_mkldnn == False)) self.check_output(check_dygraph=(self.use_mkldnn == False))
def test_check_grad_no_input(self): def test_check_grad_no_input(self):
if self.use_cudnn: if self.need_check_grad:
place = core.CUDAPlace(0) if self.use_cudnn:
self.check_grad_with_place( place = core.CUDAPlace(0)
place, ['Filter'], self.check_grad_with_place(
'Output', place, ['Filter'],
max_relative_error=0.02, 'Output',
no_grad_set=set(['Input'])) max_relative_error=0.02,
else: no_grad_set=set(['Input']))
self.check_grad(['Filter'], 'Output', no_grad_set=set(['Input'])) else:
self.check_grad(
['Filter'], 'Output', no_grad_set=set(['Input']))
def test_check_grad_no_filter(self): def test_check_grad_no_filter(self):
if self.use_cudnn: if self.need_check_grad:
place = core.CUDAPlace(0) if self.use_cudnn:
self.check_grad_with_place( place = core.CUDAPlace(0)
place, ['Input'], 'Output', no_grad_set=set(['Filter'])) self.check_grad_with_place(
else: place, ['Input'], 'Output', no_grad_set=set(['Filter']))
self.check_grad(['Input'], 'Output', no_grad_set=set(['Filter'])) else:
self.check_grad(
['Input'], 'Output', no_grad_set=set(['Filter']))
def test_check_grad(self): def test_check_grad(self):
if self.use_cudnn: if self.need_check_grad:
place = core.CUDAPlace(0) if self.use_cudnn:
self.check_grad_with_place( place = core.CUDAPlace(0)
place, self.check_grad_with_place(
set(['Input', 'Filter']), place,
'Output', set(['Input', 'Filter']),
max_relative_error=0.02) 'Output',
else: max_relative_error=0.02)
self.check_grad( else:
set(['Input', 'Filter']), 'Output', max_relative_error=0.02) self.check_grad(
set(['Input', 'Filter']), 'Output', max_relative_error=0.02)
def init_test_case(self): def init_test_case(self):
self.pad = [0, 0] self.pad = [0, 0]
...@@ -708,6 +714,124 @@ class TestDepthwiseConvTransposeAsymmetricPad_NHWC(TestConv2dTransposeOp): ...@@ -708,6 +714,124 @@ class TestDepthwiseConvTransposeAsymmetricPad_NHWC(TestConv2dTransposeOp):
self.data_format = 'NHWC' self.data_format = 'NHWC'
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestCUDNN_FP16(TestConv2dTransposeOp):
def init_test_case(self):
self.dtype = np.float16
self.pad = [1, 1]
self.stride = [1, 1]
self.groups = 1
self.dilations = [1, 1]
self.input_size = [2, 3, 5, 5] # NCHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 3, 3]
def init_op_type(self):
self.need_check_grad = False
self.use_cudnn = True
self.op_type = "conv2d_transpose"
def test_check_output(self):
if self.use_cudnn:
place = core.CUDAPlace(0)
self.check_output_with_place(
place, atol=0.02, check_dygraph=(self.use_mkldnn == False))
else:
self.check_output(check_dygraph=(self.use_mkldnn == False))
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestCUDNN_NHWC_FP16(TestCUDNN_FP16):
def init_test_case(self):
self.dtype = np.float16
self.pad = [0, 0]
self.stride = [1, 1]
self.dilations = [1, 1]
self.groups = 1
self.input_size = [2, 5, 5, 3] # NHWC
f_c = self.input_size[-1]
self.filter_size = [f_c, 6, 3, 3]
self.data_format = 'NHWC'
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestCUDNNWithSymmetricPad_NHWC_FP16(TestCUDNN_FP16):
def init_test_case(self):
self.dtype = np.float16
self.pad = [1, 1]
self.stride = [1, 1]
self.groups = 1
self.dilations = [1, 1]
self.input_size = [2, 5, 5, 3] # NHWC
f_c = self.input_size[-1]
self.filter_size = [f_c, 6, 3, 3]
self.data_format = 'NHWC'
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestCUDNNWithAsymmetricPad_NHWC_FP16(TestCUDNN_FP16):
def init_test_case(self):
self.dtype = np.float16
self.pad = [1, 0, 2, 3]
self.stride = [2, 2]
self.groups = 1
self.dilations = [1, 1]
self.input_size = [2, 5, 5, 3] # NHWC
f_c = self.input_size[-1]
self.filter_size = [f_c, 6, 3, 3]
self.data_format = 'NHWC'
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestCUDNNWithStride_NHWC_FP16(TestCUDNN_FP16):
def init_test_case(self):
self.dtype = np.float16
self.pad = [1, 1]
self.stride = [2, 2]
self.groups = 1
self.dilations = [1, 1]
self.input_size = [2, 5, 5, 3] # NHWC
f_c = self.input_size[-1]
self.filter_size = [f_c, 6, 3, 3]
self.data_format = 'NHWC'
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestCUDNNWithGroups_NHWC_FP16(TestCUDNN_FP16):
def init_test_case(self):
self.dtype = np.float16
self.pad = [1, 1]
self.stride = [1, 1]
self.dilations = [1, 1]
self.groups = 2
self.input_size = [2, 5, 5, 4] # NCHW
f_c = self.input_size[-1]
self.filter_size = [f_c, 3, 3, 3]
self.data_format = 'NHWC'
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestCUDNNWithEvenUpsample_NHWC_FP16(TestCUDNN_FP16):
def init_test_case(self):
self.dtype = np.float16
self.pad = [2, 2]
self.stride = [2, 2]
self.groups = 1
self.dilations = [1, 1]
self.output_size = [14, 14]
self.input_size = [2, 7, 7, 3] # NHWC
f_c = self.input_size[-1]
self.filter_size = [f_c, 6, 5, 5]
self.data_format = 'NHWC'
class TestConv2dTransposeAPI(unittest.TestCase): class TestConv2dTransposeAPI(unittest.TestCase):
def test_case1(self): def test_case1(self):
data1 = fluid.layers.data( data1 = fluid.layers.data(
......
...@@ -80,5 +80,6 @@ NO_FP16_CHECK_GRAD_OP_LIST = [ ...@@ -80,5 +80,6 @@ NO_FP16_CHECK_GRAD_OP_LIST = [
'fused_elemwise_activation', \ 'fused_elemwise_activation', \
'pool2d', \ 'pool2d', \
'pool3d', \ 'pool3d', \
'softmax' 'softmax',\
'conv2d_transpose'
] ]
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册