未验证 提交 20bdc3e1 编写于 作者: Y Yibing Liu 提交者: GitHub

Merge pull request #10846 from kuke/deconv_group

Add groups for conv transpose ops
......@@ -44,6 +44,7 @@ class CUDNNConvTransposeOpKernel : public framework::OpKernel<T> {
std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
// cudnn v5 does not support dilations
std::vector<int> dilations = ctx.Attr<std::vector<int>>("dilations");
int groups = ctx.Attr<int>("groups");
int user_workspace_size = ctx.Attr<int>("workspace_size_MB");
const T* input_data = input->data<T>();
......@@ -64,13 +65,13 @@ class CUDNNConvTransposeOpKernel : public framework::OpKernel<T> {
// (N, M, H, W) or (N, M, D, H, W)
cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor<T>(
layout, framework::vectorize2int(input->dims()));
layout, framework::vectorize2int(input->dims()), groups);
// (N, C, O_h, O_w) or (N, C, O_d, O_h, O_w)
cudnnTensorDescriptor_t cudnn_output_desc = output_desc.descriptor<T>(
layout, framework::vectorize2int(output->dims()));
layout, framework::vectorize2int(output->dims()), groups);
// (M, C, K_h, K_w) or (M, C, K_d, K_h, K_w)
cudnnFilterDescriptor_t cudnn_filter_desc = filter_desc.descriptor<T>(
layout, framework::vectorize2int(filter->dims()));
layout, framework::vectorize2int(filter->dims()), groups);
cudnnConvolutionDescriptor_t cudnn_conv_desc =
conv_desc.descriptor<T>(paddings, strides, dilations);
......@@ -104,11 +105,17 @@ class CUDNNConvTransposeOpKernel : public framework::OpKernel<T> {
cudnn_workspace = paddle::memory::Alloc(gpu, workspace_size_in_bytes);
// ------------------- cudnn conv transpose forward ---------------------
int input_offset = input->numel() / input->dims()[0] / groups;
int output_offset = output->numel() / output->dims()[0] / groups;
int filter_offset = filter->numel() / groups;
T alpha = 1.0f, beta = 0.0f;
for (int g = 0; g < groups; g++) {
PADDLE_ENFORCE(platform::dynload::cudnnConvolutionBackwardData(
handle, &alpha, cudnn_filter_desc, filter_data, cudnn_input_desc,
input_data, cudnn_conv_desc, algo, cudnn_workspace,
workspace_size_in_bytes, &beta, cudnn_output_desc, output_data));
handle, &alpha, cudnn_filter_desc, filter_data + filter_offset * g,
cudnn_input_desc, input_data + input_offset * g, cudnn_conv_desc,
algo, cudnn_workspace, workspace_size_in_bytes, &beta,
cudnn_output_desc, output_data + output_offset * g));
}
// Release the cudnn workspace
paddle::memory::Free(gpu, cudnn_workspace);
......@@ -134,6 +141,7 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel<T> {
std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
// cudnn v5 does not support dilations
std::vector<int> dilations = ctx.Attr<std::vector<int>>("dilations");
int groups = ctx.Attr<int>("groups");
int user_workspace_size = ctx.Attr<int>("workspace_size_MB");
// ------------------- cudnn descriptors ---------------------
......@@ -145,13 +153,13 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel<T> {
// Input: (N, M, H, W) or (N, M, D, H, W)
cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor<T>(
layout, framework::vectorize2int(input->dims()));
layout, framework::vectorize2int(input->dims()), groups);
// Output: (N, C, O_h, O_w) or (N, C, O_d, O_h, O_w)
cudnnTensorDescriptor_t cudnn_output_desc = output_desc.descriptor<T>(
layout, framework::vectorize2int(output_grad->dims()));
layout, framework::vectorize2int(output_grad->dims()), groups);
// Filter (M, C, K_h, K_w) or (M, C, K_d K_h, K_w)
cudnnFilterDescriptor_t cudnn_filter_desc = filter_desc.descriptor<T>(
layout, framework::vectorize2int(filter->dims()));
layout, framework::vectorize2int(filter->dims()), groups);
cudnnConvolutionDescriptor_t cudnn_conv_desc =
conv_desc.descriptor<T>(paddings, strides, dilations);
......@@ -205,15 +213,22 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel<T> {
cudnn_workspace = paddle::memory::Alloc(gpu, workspace_size_in_bytes);
// ------------------- cudnn conv backward data ---------------------
// FIXME(typhoonzero): template type T may not be the same as cudnn call.
int input_offset = input->numel() / input->dims()[0] / groups;
int output_grad_offset =
output_grad->numel() / output_grad->dims()[0] / groups;
int filter_offset = filter->numel() / groups;
T alpha = 1.0f, beta = 0.0f;
if (input_grad) {
T* input_grad_data = input_grad->mutable_data<T>(ctx.GetPlace());
// Because beta is zero, it is unnecessary to reset input_grad.
for (int g = 0; g < groups; g++) {
PADDLE_ENFORCE(platform::dynload::cudnnConvolutionForward(
handle, &alpha, cudnn_output_desc, output_grad_data,
cudnn_filter_desc, filter_data, cudnn_conv_desc, data_algo,
handle, &alpha, cudnn_output_desc,
output_grad_data + output_grad_offset * g, cudnn_filter_desc,
filter_data + filter_offset * g, cudnn_conv_desc, data_algo,
cudnn_workspace, workspace_size_in_bytes, &beta, cudnn_input_desc,
input_grad_data));
input_grad_data + input_offset * g));
}
}
// ------------------- cudnn conv backward filter ---------------------
......@@ -221,11 +236,16 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel<T> {
T* filter_grad_data = filter_grad->mutable_data<T>(ctx.GetPlace());
// Because beta is zero, it is unnecessary to reset filter_grad.
// Gradient with respect to the filter
for (int g = 0; g < groups; g++) {
PADDLE_ENFORCE(platform::dynload::cudnnConvolutionBackwardFilter(
handle, &alpha, cudnn_output_desc, output_grad_data, cudnn_input_desc,
input_data, cudnn_conv_desc, filter_algo, cudnn_workspace,
workspace_size_in_bytes, &beta, cudnn_filter_desc, filter_grad_data));
handle, &alpha, cudnn_output_desc,
output_grad_data + output_grad_offset * g, cudnn_input_desc,
input_data + input_offset * g, cudnn_conv_desc, filter_algo,
cudnn_workspace, workspace_size_in_bytes, &beta, cudnn_filter_desc,
filter_grad_data + filter_offset * g));
}
}
// Release the cudnn workspace
paddle::memory::Free(gpu, cudnn_workspace);
}
......
......@@ -32,6 +32,7 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides");
std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
std::vector<int> dilations = ctx->Attrs().Get<std::vector<int>>("dilations");
int groups = ctx->Attrs().Get<int>("groups");
PADDLE_ENFORCE(in_dims.size() == 4 || in_dims.size() == 5,
"ConvTransposeOp intput should be 4-D or 5-D tensor.");
......@@ -48,10 +49,10 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
"ConvTransposeOp paddings dimension and dilations "
"dimension should be the same.");
PADDLE_ENFORCE_EQ(in_dims[1], filter_dims[0],
"In ConvTransposeOp, The input channel should be the same "
"as the number of filters.");
"In ConvTransposeOp, The number of input channels should "
"be equal to the number of filter's channels.");
std::vector<int64_t> output_shape({in_dims[0], filter_dims[1]});
std::vector<int64_t> output_shape({in_dims[0], filter_dims[1] * groups});
for (size_t i = 0; i < strides.size(); ++i) {
auto filter_extent = dilations[i] * (filter_dims[i + 2] - 1) + 1;
output_shape.push_back((in_dims[i + 2] - 1) * strides[i] - 2 * paddings[i] +
......@@ -102,7 +103,10 @@ void Conv2DTransposeOpMaker::Make() {
AddOutput("Output",
"(Tensor) The output tensor of convolution transpose operator. "
"The format of output tensor is also NCHW.");
AddAttr<int>("groups",
"(int default:1), the groups number of the convolution "
"transpose operator. ")
.SetDefault(1);
AddAttr<std::vector<int>>("dilations",
"(vector<int> default:{1, 1}), the "
"dilations(h_dilation, w_dilation) of convolution "
......@@ -204,6 +208,10 @@ void Conv3DTransposeOpMaker::Make() {
"(vector<int> default:{0, 0, 0}), paddings(d_pad, "
"h_pad, w_pad) of convolution transpose operator.")
.SetDefault({0, 0, 0});
AddAttr<int>("groups",
"(int default:1), the groups number of the convolution3d "
"transpose operator. ")
.SetDefault(1);
AddAttr<bool>(
"use_cudnn",
"(bool, default false) Only used in cudnn kernel, need install cudnn")
......
......@@ -70,7 +70,7 @@ class GemmConvTransposeKernel : public framework::OpKernel<T> {
std::vector<int> strides = context.Attr<std::vector<int>>("strides");
std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
std::vector<int> dilations = context.Attr<std::vector<int>>("dilations");
// groups will alway be disabled in conv2dtranspose.
int groups = context.Attr<int>("groups");
const int batch_size = static_cast<int>(input->dims()[0]);
......@@ -81,10 +81,10 @@ class GemmConvTransposeKernel : public framework::OpKernel<T> {
// use col_shape in the im2col and col2im (or vol2col and col2vol)
// calculation
// col_shape_vec: {c, k_h, k_w, h, w} or {c, k_d, k_h, k_w, d, h, w}
// col_shape_vec: {c/g, k_h, k_w, h, w} or {c/g, k_d, k_h, k_w, d, h, w}
size_t data_dim = filter_shape_vec.size() - 2;
std::vector<int64_t> col_shape_vec(1 + 2 * data_dim);
col_shape_vec[0] = output->dims()[1];
col_shape_vec[0] = output->dims()[1] / groups;
for (size_t j = 0; j < data_dim; ++j) {
col_shape_vec[j + 1] = filter_shape_vec[j + 2];
col_shape_vec[j + 1 + data_dim] = input_shape_vec[j + 2];
......@@ -92,7 +92,7 @@ class GemmConvTransposeKernel : public framework::OpKernel<T> {
DDim col_shape(framework::make_ddim(col_shape_vec));
// use col_matrix_shape in the gemm calculation
// size: (c * k_h * k_w, h * w) or (c * k_d * k_h * k_w, d * h * w)
// size: (c/g * k_h * k_w, h * w) or (c/g * k_d * k_h * k_w, d * h * w)
DDim col_matrix_shape = framework::flatten_to_2d(col_shape, data_dim + 1);
Tensor col;
......@@ -111,7 +111,7 @@ class GemmConvTransposeKernel : public framework::OpKernel<T> {
// input matrix size: (m, h * w) or (m, d * h * w)
DDim input_matrix_shape = {input->dims()[1], col_matrix_shape[1]};
// filter size: (m, c * k_h * k_w) or (m, c * k_d * k_h * k_w)
// filter size: (m, c/g * k_h * k_w) or (m, c/g * k_d * k_h * k_w)
DDim filter_matrix_shape = {input->dims()[1], col_matrix_shape[0]};
filter.Resize(filter_matrix_shape);
......@@ -121,6 +121,8 @@ class GemmConvTransposeKernel : public framework::OpKernel<T> {
auto blas = math::GetBlas<DeviceContext, T>(dev_ctx);
set_zero(dev_ctx, output, static_cast<T>(0));
int in_step = static_cast<int>(input->dims()[1]) / groups;
int out_step = static_cast<int>(output->dims()[1]) / groups;
math::Col2ImFunctor<math::ColFormat::kCFO, DeviceContext, T> col2im;
math::Col2VolFunctor<DeviceContext, T> col2vol;
......@@ -133,22 +135,29 @@ class GemmConvTransposeKernel : public framework::OpKernel<T> {
// output size: (c, o_h, o_w) or (c, o_d, o_h, o_w)
Tensor output_batch = output->Slice(i, i + 1).Resize(output_shape);
// col_matrix = filter * input_batch
// of shape (c * k_h * k_w, h * w) or (c * k_d * k_h * k_w, d * h * w)
blas.MatMul(filter, true, input_batch, false, static_cast<T>(1.0),
for (int g = 0; g < groups; g++) {
Tensor in_slice = input_batch.Slice(g * in_step, (g + 1) * in_step);
Tensor filter_slice = filter.Slice(g * in_step, (g + 1) * in_step);
Tensor out_slice = output_batch.Slice(g * out_step, (g + 1) * out_step);
// col_matrix = filter_slice * input_slice
// of shape (c/g * k_h * k_w, h * w)
// or (c/g * k_d * k_h * k_w, d * h * w)
blas.MatMul(filter_slice, true, in_slice, false, static_cast<T>(1.0),
&col_matrix, static_cast<T>(0.0));
if (data_dim == 2U) {
// col2im: col_matrix -> dy
// from (c * k_h * k_w, h * w) to (c, o_h, o_w)
// from (c/g * k_h * k_w, h * w) to (c/g, o_h, o_w)
col2im(dev_ctx, col, dilations, strides,
std::vector<int>{paddings[0], paddings[1], paddings[0],
paddings[1]},
&output_batch);
&out_slice);
} else if (data_dim == 3U) {
// col2vol: col_matrix -> dy
// from (c * k_d * k_h * k_w, d * h * w) to (c, o_d, o_h, o_w)
col2vol(dev_ctx, col, dilations, strides, paddings, &output_batch);
// from (c/g * k_d * k_h * k_w, d * h * w) to (c/g, o_d, o_h, o_w)
col2vol(dev_ctx, col, dilations, strides, paddings, &out_slice);
}
}
}
}
......@@ -174,6 +183,7 @@ class GemmConvTransposeGradKernel : public framework::OpKernel<T> {
std::vector<int> strides = context.Attr<std::vector<int>>("strides");
std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
std::vector<int> dilations = context.Attr<std::vector<int>>("dilations");
int groups = context.Attr<int>("groups");
const int batch_size = static_cast<int>(input->dims()[0]);
......@@ -205,9 +215,11 @@ class GemmConvTransposeGradKernel : public framework::OpKernel<T> {
// input matrix size: (m, h * w) or (m, d * h * w)
DDim input_matrix_shape = {input->dims()[1], col_matrix_shape[1]};
// filter size: (m, c * k_h * k_w) or (m, c * k_d * k_h * k_w)
DDim filter_matrix_shape = {input->dims()[1], col_matrix_shape[0]};
// filter size: (m, c/g * k_h * k_w) or (m, c/g * k_d * k_h * k_w)
DDim filter_matrix_shape = {input->dims()[1], col_matrix_shape[0] / groups};
filter.Resize(filter_matrix_shape);
int in_step = static_cast<int>(input->dims()[1]) / groups;
int col_step = static_cast<int>(col_matrix_shape[0]) / groups;
// convolution transpose grad on input:
// im2col + gemm (similar to conv-forward)
......@@ -233,7 +245,7 @@ class GemmConvTransposeGradKernel : public framework::OpKernel<T> {
if (input_grad) {
input_grad->mutable_data<T>(context.GetPlace());
}
if (filter_grad) { // filter size (m, c, k_h, k_w)
if (filter_grad) { // filter size (m, c/g, k_h, k_w)
filter_grad->mutable_data<T>(context.GetPlace());
set_zero(dev_ctx, filter_grad, static_cast<T>(0));
filter_grad_ = *filter_grad;
......@@ -268,8 +280,17 @@ class GemmConvTransposeGradKernel : public framework::OpKernel<T> {
// or
// (m, c * k_d * k_h * k_w) * (c * k_d * k_h * k_w, d * h * w) -> (m,
// d, h, w)
blas.MatMul(filter, false, col_matrix, false, static_cast<T>(1.0),
&input_grad_batch, static_cast<T>(0.0));
for (int g = 0; g < groups; g++) {
Tensor input_grad_slice =
input_grad_batch.Slice(g * in_step, (g + 1) * in_step);
Tensor filter_slice = filter.Slice(g * in_step, (g + 1) * in_step);
Tensor col_matrix_slice =
col_matrix.Slice(g * col_step, (g + 1) * col_step);
blas.MatMul(filter_slice, false, col_matrix_slice, false,
static_cast<T>(1.0), &input_grad_slice,
static_cast<T>(0.0));
}
}
if (filter_grad) {
// input batch
......@@ -279,8 +300,17 @@ class GemmConvTransposeGradKernel : public framework::OpKernel<T> {
// or
// (m, d * h * w) * (d * h * w, c * k_d * k_h * k_w) -> (m, c * k_d *
// k_h * k_w)
blas.MatMul(in_batch, false, col_matrix, true, static_cast<T>(1.0),
&filter_grad_, static_cast<T>(1.0));
for (int g = 0; g < groups; g++) {
Tensor in_batch_slice =
in_batch.Slice(g * in_step, (g + 1) * in_step);
Tensor filter_grad_slice =
filter_grad_.Slice(g * in_step, (g + 1) * in_step);
Tensor col_matrix_slice =
col_matrix.Slice(g * col_step, (g + 1) * col_step);
blas.MatMul(in_batch_slice, false, col_matrix_slice, true,
static_cast<T>(1.0), &filter_grad_slice,
static_cast<T>(1.0));
}
}
}
}
......
......@@ -1708,6 +1708,7 @@ def conv2d_transpose(input,
padding=0,
stride=1,
dilation=1,
groups=None,
param_attr=None,
bias_attr=None,
use_cudnn=True,
......@@ -1778,6 +1779,12 @@ def conv2d_transpose(input,
dilation(int|tuple): The dilation size. If dilation is a tuple, it must
contain two integers, (dilation_H, dilation_W). Otherwise, the
dilation_H = dilation_W = dilation. Default: dilation = 1.
groups(int): The groups number of the Conv2d transpose layer. Inspired by
grouped convolution in Alex Krizhevsky's Deep CNN paper, in which
when group=2, the first half of the filters is only connected to the
first half of the input channels, while the second half of the
filters is only connected to the second half of the input channels.
Default: groups=1
param_attr(ParamAttr): The parameters to the Conv2d_transpose Layer.
Default: None
bias_attr(ParamAttr): Bias parameter for the Conv2d layer. Default: None
......@@ -1832,7 +1839,8 @@ def conv2d_transpose(input,
filter_size = utils.convert_to_list(filter_size, 2,
'conv2d_transpose.filter_size')
filter_shape = [input_channel, num_filters] + filter_size
groups = 1 if groups is None else groups
filter_shape = [input_channel, num_filters / groups] + filter_size
img_filter = helper.create_parameter(
dtype=input.dtype, shape=filter_shape, attr=helper.param_attr)
......
......@@ -21,8 +21,11 @@ from op_test import OpTest
def conv2dtranspose_forward_naive(input_, filter_, attrs):
in_n, in_c, in_h, in_w = input_.shape
f_c, out_c, f_h, f_w = filter_.shape
f_c, f_out_c, f_h, f_w = filter_.shape
groups = attrs['groups']
assert in_c == f_c
out_c = f_out_c * groups
sub_in_c = in_c / groups
stride, pad, dilations = attrs['strides'], attrs['paddings'], attrs[
'dilations']
......@@ -36,15 +39,21 @@ def conv2dtranspose_forward_naive(input_, filter_, attrs):
for n in range(in_n):
for i in range(in_h):
for j in range(in_w):
input_masked = input_[n, :, i, j] # (c)
input_masked = np.reshape(input_masked, (in_c, 1, 1))
for g in range(groups):
input_masked = input_[n, g * sub_in_c:(g + 1) * sub_in_c, i,
j] # (c)
input_masked = np.reshape(input_masked, (sub_in_c, 1, 1))
input_masked = np.tile(input_masked, (1, f_h, f_w))
for k in range(out_c):
tmp_out = np.sum(input_masked * filter_[:, k, :, :], axis=0)
for k in range(f_out_c):
tmp_out = np.sum(
input_masked *
filter_[g * sub_in_c:(g + 1) * sub_in_c, k, :, :],
axis=0)
i1, i2 = i * stride[0], i * stride[0] + d_bolck_h
j1, j2 = j * stride[0], j * stride[0] + d_bolck_h
out[n, k, i1:i2:dilations[0], j1:j2:dilations[1]] += tmp_out
out[n, g * f_out_c + k, i1:i2:dilations[0], j1:j2:
dilations[1]] += tmp_out
out = out[:, :, pad[0]:out_h - pad[0], pad[1]:out_w - pad[1]]
return out
......@@ -64,6 +73,7 @@ class TestConv2dTransposeOp(OpTest):
self.attrs = {
'strides': self.stride,
'paddings': self.pad,
'groups': self.groups,
'dilations': self.dilations,
'use_cudnn': self.use_cudnn,
'data_format': 'AnyLayout' # TODO(dzhwinter) : should be fix latter
......@@ -127,6 +137,7 @@ class TestConv2dTransposeOp(OpTest):
self.pad = [0, 0]
self.stride = [1, 1]
self.dilations = [1, 1]
self.groups = 1
self.input_size = [2, 3, 5, 5] # NCHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 3, 3]
......@@ -140,16 +151,29 @@ class TestWithPad(TestConv2dTransposeOp):
self.pad = [1, 1]
self.stride = [1, 1]
self.dilations = [1, 1]
self.groups = 1
self.input_size = [2, 3, 5, 5] # NCHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 3, 3]
class TestWithGroups(TestConv2dTransposeOp):
def init_test_case(self):
self.pad = [1, 1]
self.stride = [1, 1]
self.dilations = [1, 1]
self.groups = 2
self.input_size = [2, 4, 5, 5] # NCHW
f_c = self.input_size[1]
self.filter_size = [f_c, 3, 3, 3]
class TestWithStride(TestConv2dTransposeOp):
def init_test_case(self):
self.pad = [1, 1]
self.stride = [2, 2]
self.dilations = [1, 1]
self.groups = 1
self.input_size = [2, 3, 5, 5] # NCHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 3, 3]
......@@ -159,6 +183,7 @@ class TestWithDilation(TestConv2dTransposeOp):
def init_test_case(self):
self.pad = [1, 1]
self.stride = [1, 1]
self.groups = 1
self.dilations = [2, 2]
self.input_size = [2, 3, 5, 5] # NCHW
f_c = self.input_size[1]
......@@ -176,6 +201,7 @@ class TestCUDNNWithPad(TestWithPad):
def init_test_case(self):
self.pad = [1, 1]
self.stride = [1, 1]
self.groups = 1
self.dilations = [1, 1]
self.input_size = [2, 3, 5, 5] # NCHW
f_c = self.input_size[1]
......@@ -190,6 +216,7 @@ class TestCUDNNWithStride(TestWithStride):
def init_test_case(self):
self.pad = [1, 1]
self.stride = [2, 2]
self.groups = 1
self.dilations = [1, 1]
self.input_size = [2, 3, 5, 5] # NCHW
f_c = self.input_size[1]
......@@ -200,6 +227,21 @@ class TestCUDNNWithStride(TestWithStride):
self.op_type = "conv2d_transpose"
class TestCUDNNWithGroups(TestWithGroups):
def init_test_case(self):
self.pad = [1, 1]
self.stride = [1, 1]
self.dilations = [1, 1]
self.groups = 2
self.input_size = [2, 4, 5, 5] # NCHW
f_c = self.input_size[1]
self.filter_size = [f_c, 3, 3, 3]
def init_op_type(self):
self.use_cudnn = True
self.op_type = "conv2d_transpose"
# Please Don't remove the following code.
# Currently, CI use cudnn V5.0 which not support dilation conv.
# class TestCUDNNWithDilation(TestWithDilation):
......
......@@ -21,8 +21,11 @@ from op_test import OpTest
def conv3dtranspose_forward_naive(input_, filter_, attrs):
in_n, in_c, in_d, in_h, in_w = input_.shape
f_c, out_c, f_d, f_h, f_w = filter_.shape
f_c, f_out_c, f_d, f_h, f_w = filter_.shape
groups = attrs['groups']
assert in_c == f_c
out_c = f_out_c * groups
sub_in_c = in_c / groups
stride, pad, dilations = attrs['strides'], attrs['paddings'], attrs[
'dilations']
......@@ -39,18 +42,23 @@ def conv3dtranspose_forward_naive(input_, filter_, attrs):
for d in range(in_d):
for i in range(in_h):
for j in range(in_w):
input_masked = input_[n, :, d, i, j] # (c)
input_masked = np.reshape(input_masked, (in_c, 1, 1, 1))
for g in range(groups):
input_masked = input_[n, g * sub_in_c:(g + 1
) * sub_in_c, d,
i, j] # (c)
input_masked = np.reshape(input_masked,
(sub_in_c, 1, 1, 1))
input_masked = np.tile(input_masked, (1, f_d, f_h, f_w))
for k in range(out_c):
tmp_out = np.sum(input_masked * filter_[:, k, :, :, :],
for k in range(f_out_c):
tmp_out = np.sum(input_masked * filter_[
g * sub_in_c:(g + 1) * sub_in_c, k, :, :, :],
axis=0)
d1, d2 = d * stride[0], d * stride[0] + d_bolck_d
i1, i2 = i * stride[1], i * stride[1] + d_bolck_h
j1, j2 = j * stride[2], j * stride[2] + d_bolck_w
out[n, k, d1:d2:dilations[0], i1:i2:dilations[1], j1:j2:
dilations[2]] += tmp_out
out[n, g * f_out_c + k, d1:d2:dilations[0], i1:i2:
dilations[1], j1:j2:dilations[2]] += tmp_out
out = out[:, :, pad[0]:out_d - pad[0], pad[1]:out_h - pad[1], pad[2]:out_w -
pad[2]]
......@@ -72,6 +80,7 @@ class TestConv3dTransposeOp(OpTest):
'strides': self.stride,
'paddings': self.pad,
'dilations': self.dilations,
'groups': self.groups,
'use_cudnn': self.use_cudnn,
'data_format': 'AnyLayout' # TODO(dzhwinter) : should be fix latter
}
......@@ -134,6 +143,7 @@ class TestConv3dTransposeOp(OpTest):
self.pad = [0, 0, 0]
self.stride = [1, 1, 1]
self.dilations = [1, 1, 1]
self.groups = 1
self.input_size = [2, 3, 5, 5, 5] # NCDHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 3, 3, 3]
......@@ -147,16 +157,29 @@ class TestWithPad(TestConv3dTransposeOp):
self.pad = [1, 1, 1]
self.stride = [1, 1, 1]
self.dilations = [1, 1, 1]
self.groups = 1
self.input_size = [2, 3, 5, 5, 5] # NCDHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 3, 3, 3]
class TestWithGroups(TestConv3dTransposeOp):
def init_test_case(self):
self.pad = [1, 1, 1]
self.stride = [1, 1, 1]
self.dilations = [1, 1, 1]
self.groups = 2
self.input_size = [2, 4, 5, 5, 5] # NCHW
f_c = self.input_size[1]
self.filter_size = [f_c, 3, 3, 3, 3]
class TestWithStride(TestConv3dTransposeOp):
def init_test_case(self):
self.pad = [1, 1, 1]
self.stride = [2, 2, 2]
self.dilations = [1, 1, 1]
self.groups = 1
self.input_size = [2, 3, 5, 5, 5] # NCDHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 3, 3, 3]
......@@ -167,6 +190,7 @@ class TestWithDilation(TestConv3dTransposeOp):
self.pad = [1, 1, 1]
self.stride = [1, 1, 1]
self.dilations = [2, 2, 2]
self.groups = 1
self.input_size = [2, 3, 5, 5, 5] # NCDHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 3, 3, 3]
......@@ -184,6 +208,7 @@ class TestCUDNNWithPad(TestWithPad):
self.pad = [1, 1, 1]
self.stride = [1, 1, 1]
self.dilations = [1, 1, 1]
self.groups = 1
self.input_size = [2, 3, 5, 5, 5] # NCDHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 3, 3, 3]
......@@ -198,6 +223,7 @@ class TestCUDNNWithStride(TestWithStride):
self.pad = [1, 1, 1]
self.stride = [2, 2, 2]
self.dilations = [1, 1, 1]
self.groups = 1
self.input_size = [2, 3, 5, 5, 5] # NCDHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 3, 3, 3]
......@@ -207,6 +233,21 @@ class TestCUDNNWithStride(TestWithStride):
self.op_type = "conv3d_transpose"
class TestCUDNNWithGroups(TestWithGroups):
def init_test_case(self):
self.pad = [1, 1, 1]
self.stride = [1, 1, 1]
self.dilations = [1, 1, 1]
self.groups = 2
self.input_size = [2, 4, 5, 5, 5] # NCHW
f_c = self.input_size[1]
self.filter_size = [f_c, 3, 3, 3, 3]
def init_op_type(self):
self.use_cudnn = True
self.op_type = "conv3d_transpose"
# Please Don't remove the following code.
# Currently, CI use cudnn V5.0 which not support dilation conv.
# class TestCUDNNWithDilation(TestWithDilation):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册