未验证 提交 355caee1 编写于 作者: W wangchaochaohu 提交者: GitHub

fix conv_transpose Op fp16 error test=develop (#24695)

上级 19e5f787
...@@ -148,7 +148,7 @@ struct SearchAlgorithm<cudnnConvolutionFwdAlgoPerf_t> { ...@@ -148,7 +148,7 @@ struct SearchAlgorithm<cudnnConvolutionFwdAlgoPerf_t> {
} }
#endif #endif
if (!exhaustive) { if (!exhaustive && !deterministic) {
#if CUDNN_VERSION >= 7001 #if CUDNN_VERSION >= 7001
int perf_count; int perf_count;
int best_algo_idx = 0; int best_algo_idx = 0;
...@@ -185,6 +185,8 @@ struct SearchAlgorithm<cudnnConvolutionFwdAlgoPerf_t> { ...@@ -185,6 +185,8 @@ struct SearchAlgorithm<cudnnConvolutionFwdAlgoPerf_t> {
workspace_size_limit, &algo)); workspace_size_limit, &algo));
#endif #endif
VLOG(3) << "choose algo " << algo; VLOG(3) << "choose algo " << algo;
} else if (deterministic) {
algo = static_cast<cudnnConvolutionFwdAlgo_t>(1);
} else { } else {
auto& dev_ctx = auto& dev_ctx =
ctx.template device_context<platform::CUDADeviceContext>(); ctx.template device_context<platform::CUDADeviceContext>();
......
...@@ -245,7 +245,8 @@ class CUDNNConvTransposeOpKernel : public framework::OpKernel<T> { ...@@ -245,7 +245,8 @@ class CUDNNConvTransposeOpKernel : public framework::OpKernel<T> {
int output_offset = int output_offset =
transformed_output.numel() / transformed_output.dims()[0] / groups; transformed_output.numel() / transformed_output.dims()[0] / groups;
int filter_offset = filter->numel() / groups; int filter_offset = filter->numel() / groups;
T alpha = static_cast<T>(1.0), beta = static_cast<T>(0.0); ScalingParamType<T> alpha = 1.0f;
ScalingParamType<T> beta = 0.0f;
auto workspace_handle = dev_ctx.cudnn_workspace_handle(); auto workspace_handle = dev_ctx.cudnn_workspace_handle();
for (int g = 0; g < groups; g++) { for (int g = 0; g < groups; g++) {
auto cudnn_func = [&](void* cudnn_workspace) { auto cudnn_func = [&](void* cudnn_workspace) {
...@@ -493,7 +494,8 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel<T> { ...@@ -493,7 +494,8 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel<T> {
int output_grad_offset = transformed_output_grad.numel() / int output_grad_offset = transformed_output_grad.numel() /
transformed_output_grad.dims()[0] / groups; transformed_output_grad.dims()[0] / groups;
int filter_offset = filter->numel() / groups; int filter_offset = filter->numel() / groups;
T alpha = static_cast<T>(1.0), beta = static_cast<T>(0.0); ScalingParamType<T> alpha = 1.0f;
ScalingParamType<T> beta = 0.0f;
auto workspace_handle = dev_ctx.cudnn_workspace_handle(); auto workspace_handle = dev_ctx.cudnn_workspace_handle();
if (input_grad) { if (input_grad) {
// Because beta is zero, it is unnecessary to reset input_grad. // Because beta is zero, it is unnecessary to reset input_grad.
......
...@@ -109,6 +109,7 @@ class TestConv2dTransposeOp(OpTest): ...@@ -109,6 +109,7 @@ class TestConv2dTransposeOp(OpTest):
def setUp(self): def setUp(self):
# init as conv transpose # init as conv transpose
self.dtype = np.float64 self.dtype = np.float64
self.need_check_grad = True
self.is_test = False self.is_test = False
self.use_cudnn = False self.use_cudnn = False
self.use_mkldnn = False self.use_mkldnn = False
...@@ -152,6 +153,7 @@ class TestConv2dTransposeOp(OpTest): ...@@ -152,6 +153,7 @@ class TestConv2dTransposeOp(OpTest):
self.check_output(check_dygraph=(self.use_mkldnn == False)) self.check_output(check_dygraph=(self.use_mkldnn == False))
def test_check_grad_no_input(self): def test_check_grad_no_input(self):
if self.need_check_grad:
if self.use_cudnn: if self.use_cudnn:
place = core.CUDAPlace(0) place = core.CUDAPlace(0)
self.check_grad_with_place( self.check_grad_with_place(
...@@ -160,17 +162,21 @@ class TestConv2dTransposeOp(OpTest): ...@@ -160,17 +162,21 @@ class TestConv2dTransposeOp(OpTest):
max_relative_error=0.02, max_relative_error=0.02,
no_grad_set=set(['Input'])) no_grad_set=set(['Input']))
else: else:
self.check_grad(['Filter'], 'Output', no_grad_set=set(['Input'])) self.check_grad(
['Filter'], 'Output', no_grad_set=set(['Input']))
def test_check_grad_no_filter(self): def test_check_grad_no_filter(self):
if self.need_check_grad:
if self.use_cudnn: if self.use_cudnn:
place = core.CUDAPlace(0) place = core.CUDAPlace(0)
self.check_grad_with_place( self.check_grad_with_place(
place, ['Input'], 'Output', no_grad_set=set(['Filter'])) place, ['Input'], 'Output', no_grad_set=set(['Filter']))
else: else:
self.check_grad(['Input'], 'Output', no_grad_set=set(['Filter'])) self.check_grad(
['Input'], 'Output', no_grad_set=set(['Filter']))
def test_check_grad(self): def test_check_grad(self):
if self.need_check_grad:
if self.use_cudnn: if self.use_cudnn:
place = core.CUDAPlace(0) place = core.CUDAPlace(0)
self.check_grad_with_place( self.check_grad_with_place(
...@@ -708,6 +714,124 @@ class TestDepthwiseConvTransposeAsymmetricPad_NHWC(TestConv2dTransposeOp): ...@@ -708,6 +714,124 @@ class TestDepthwiseConvTransposeAsymmetricPad_NHWC(TestConv2dTransposeOp):
self.data_format = 'NHWC' self.data_format = 'NHWC'
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestCUDNN_FP16(TestConv2dTransposeOp):
def init_test_case(self):
self.dtype = np.float16
self.pad = [1, 1]
self.stride = [1, 1]
self.groups = 1
self.dilations = [1, 1]
self.input_size = [2, 3, 5, 5] # NCHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 3, 3]
def init_op_type(self):
self.need_check_grad = False
self.use_cudnn = True
self.op_type = "conv2d_transpose"
def test_check_output(self):
if self.use_cudnn:
place = core.CUDAPlace(0)
self.check_output_with_place(
place, atol=0.02, check_dygraph=(self.use_mkldnn == False))
else:
self.check_output(check_dygraph=(self.use_mkldnn == False))
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestCUDNN_NHWC_FP16(TestCUDNN_FP16):
def init_test_case(self):
self.dtype = np.float16
self.pad = [0, 0]
self.stride = [1, 1]
self.dilations = [1, 1]
self.groups = 1
self.input_size = [2, 5, 5, 3] # NHWC
f_c = self.input_size[-1]
self.filter_size = [f_c, 6, 3, 3]
self.data_format = 'NHWC'
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestCUDNNWithSymmetricPad_NHWC_FP16(TestCUDNN_FP16):
def init_test_case(self):
self.dtype = np.float16
self.pad = [1, 1]
self.stride = [1, 1]
self.groups = 1
self.dilations = [1, 1]
self.input_size = [2, 5, 5, 3] # NHWC
f_c = self.input_size[-1]
self.filter_size = [f_c, 6, 3, 3]
self.data_format = 'NHWC'
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestCUDNNWithAsymmetricPad_NHWC_FP16(TestCUDNN_FP16):
def init_test_case(self):
self.dtype = np.float16
self.pad = [1, 0, 2, 3]
self.stride = [2, 2]
self.groups = 1
self.dilations = [1, 1]
self.input_size = [2, 5, 5, 3] # NHWC
f_c = self.input_size[-1]
self.filter_size = [f_c, 6, 3, 3]
self.data_format = 'NHWC'
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestCUDNNWithStride_NHWC_FP16(TestCUDNN_FP16):
def init_test_case(self):
self.dtype = np.float16
self.pad = [1, 1]
self.stride = [2, 2]
self.groups = 1
self.dilations = [1, 1]
self.input_size = [2, 5, 5, 3] # NHWC
f_c = self.input_size[-1]
self.filter_size = [f_c, 6, 3, 3]
self.data_format = 'NHWC'
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestCUDNNWithGroups_NHWC_FP16(TestCUDNN_FP16):
def init_test_case(self):
self.dtype = np.float16
self.pad = [1, 1]
self.stride = [1, 1]
self.dilations = [1, 1]
self.groups = 2
self.input_size = [2, 5, 5, 4] # NCHW
f_c = self.input_size[-1]
self.filter_size = [f_c, 3, 3, 3]
self.data_format = 'NHWC'
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestCUDNNWithEvenUpsample_NHWC_FP16(TestCUDNN_FP16):
def init_test_case(self):
self.dtype = np.float16
self.pad = [2, 2]
self.stride = [2, 2]
self.groups = 1
self.dilations = [1, 1]
self.output_size = [14, 14]
self.input_size = [2, 7, 7, 3] # NHWC
f_c = self.input_size[-1]
self.filter_size = [f_c, 6, 5, 5]
self.data_format = 'NHWC'
class TestConv2dTransposeAPI(unittest.TestCase): class TestConv2dTransposeAPI(unittest.TestCase):
def test_case1(self): def test_case1(self):
data1 = fluid.layers.data( data1 = fluid.layers.data(
......
...@@ -80,5 +80,6 @@ NO_FP16_CHECK_GRAD_OP_LIST = [ ...@@ -80,5 +80,6 @@ NO_FP16_CHECK_GRAD_OP_LIST = [
'fused_elemwise_activation', \ 'fused_elemwise_activation', \
'pool2d', \ 'pool2d', \
'pool3d', \ 'pool3d', \
'softmax' 'softmax',\
'conv2d_transpose'
] ]
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册