未验证 提交 a94c3cbb 编写于 作者: L LielinJiang 提交者: GitHub

register cudnn conv double grad for depthwise conv (#29807)

上级 01e2874a
......@@ -1177,6 +1177,12 @@ REGISTER_OP_KERNEL(
paddle::operators::CUDNNConvDoubleGradOpKernel<double>,
paddle::operators::CUDNNConvDoubleGradOpKernel<plat::float16>);
REGISTER_OP_CUDA_KERNEL(
depthwise_conv2d_grad_grad,
paddle::operators::CUDNNConvDoubleGradOpKernel<float>,
paddle::operators::CUDNNConvDoubleGradOpKernel<double>,
paddle::operators::CUDNNConvDoubleGradOpKernel<plat::float16>);
REGISTER_OP_KERNEL(conv3d, CUDNN, plat::CUDAPlace,
paddle::operators::CUDNNConvOpKernel<float>,
paddle::operators::CUDNNConvOpKernel<double>,
......
......@@ -772,7 +772,10 @@ REGISTER_OPERATOR(depthwise_conv2d, ops::ConvOp, ops::Conv2DOpMaker,
ops::ConvOpInferVarType,
ops::Conv2DGradMaker<paddle::framework::OpDesc>,
ops::Conv2DGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(depthwise_conv2d_grad, ops::ConvOpGrad);
REGISTER_OPERATOR(depthwise_conv2d_grad, ops::ConvOpGrad,
ops::Conv2DDoubleGradMaker<paddle::framework::OpDesc>,
ops::Conv2DDoubleGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(depthwise_conv2d_grad_grad, ops::ConvOpDoubleGrad);
REGISTER_OPERATOR(conv3d, ops::ConvOp, ops::Conv3DOpMaker,
ops::ConvOpInferVarType,
......
......@@ -471,5 +471,36 @@ class TestConv3DDoubleGradCheck_ChannelLast_AsyPadding(unittest.TestCase):
self.func(p)
class TestDepthWiseConvDoubleGradCheck(unittest.TestCase):
@prog_scope()
def func(self, place):
shape = [2, 4, 3, 3]
eps = 0.005
dtype = np.float64
x = layers.data('x', shape, False, dtype)
# condition of depthwise conv:
# use_cudnn == False
# groups == filters
# num_filters % num_channels == 0
y = layers.conv2d(
x, shape[1], 1, groups=shape[1], bias_attr=False, use_cudnn=False)
x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
w = fluid.default_main_program().global_block().all_parameters()
w_arr = []
for p in w:
w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype))
gradient_checker.double_grad_check(
[x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps)
def test_grad(self):
places = []
if core.is_compiled_with_cuda():
places.append(fluid.CUDAPlace(0))
for p in places:
self.func(p)
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册