提交 21ce7042 编写于 作者: C chengduoZH

refine conv2d for filter size:(1,1)

上级 b6f9ba48
...@@ -35,6 +35,18 @@ inline int OutputSize(int input_size, int filter_size, int dilation, ...@@ -35,6 +35,18 @@ inline int OutputSize(int input_size, int filter_size, int dilation,
1; 1;
return output_size; return output_size;
} }
inline bool NotExpand(std::vector<int64_t>& filter_dim,
std::vector<int>& strides, std::vector<int>& paddings,
std::vector<int>& dilations) {
bool filter_1 = true, strides_1 = true, padding_0 = true, dilation_1 = true;
for (size_t j = 0; j < strides.size(); ++j) {
filter_1 &= (static_cast<int>(filter_dim[j]) == 1);
strides_1 &= (strides[j] == 1);
padding_0 &= (paddings[j] == 0);
dilation_1 &= (dilations[j] == 1);
}
return filter_1 && strides_1 && padding_0 && dilation_1;
}
// Define Op classes in .h file so that other conv // Define Op classes in .h file so that other conv
// operator implementations can reuse the code. // operator implementations can reuse the code.
...@@ -110,14 +122,17 @@ class GemmConvKernel : public framework::OpKernel<T> { ...@@ -110,14 +122,17 @@ class GemmConvKernel : public framework::OpKernel<T> {
framework::DDim col_matrix_shape = framework::DDim col_matrix_shape =
framework::flatten_to_2d(col_shape, filter_shape_vec.size() + 1); framework::flatten_to_2d(col_shape, filter_shape_vec.size() + 1);
bool not_expand = NotExpand(filter_shape_vec, strides, paddings, dilations);
Tensor col; Tensor col;
col.mutable_data<T>(col_shape, context.GetPlace());
// col_matrix shares the same piece of data with col, // col_matrix shares the same piece of data with col,
// but will be reshaped into a two-dimensional matrix shape // but will be reshaped into a two-dimensional matrix shape
// to call the matrix multiplication interface. // to call the matrix multiplication interface.
Tensor col_matrix; Tensor col_matrix;
col_matrix.ShareDataWith(col); if (!not_expand) {
col_matrix.Resize(col_matrix_shape); col.mutable_data<T>(col_shape, context.GetPlace());
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
}
framework::DDim input_shape = framework::slice_ddim( framework::DDim input_shape = framework::slice_ddim(
input->dims(), 1, static_cast<int>(input->dims().size())); input->dims(), 1, static_cast<int>(input->dims().size()));
...@@ -134,31 +149,51 @@ class GemmConvKernel : public framework::OpKernel<T> { ...@@ -134,31 +149,51 @@ class GemmConvKernel : public framework::OpKernel<T> {
int in_step = static_cast<int>(input->dims()[1]) / groups; int in_step = static_cast<int>(input->dims()[1]) / groups;
int out_step = static_cast<int>(output->dims()[1]) / groups; int out_step = static_cast<int>(output->dims()[1]) / groups;
for (int i = 0; i < batch_size; i++) { if (!not_expand) {
Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape); for (int i = 0; i < batch_size; i++) {
Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape); Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
for (int g = 0; g < groups; g++) { Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape);
Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step); for (int g = 0; g < groups; g++) {
Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
if (filter_shape_vec.size() == 2) {
// im2col if (filter_shape_vec.size() == 2) {
math::Im2ColFunctor<math::ColFormat::kCFO, Place, T> im2col; // im2col
im2col(context.device_context(), in_slice, col, dilations[0], math::Im2ColFunctor<math::ColFormat::kCFO, Place, T> im2col;
dilations[1], strides[0], strides[1], paddings[0], paddings[0], im2col(context.device_context(), in_slice, col, dilations[0],
paddings[1], paddings[1]); dilations[1], strides[0], strides[1], paddings[0],
} else if (filter_shape_vec.size() == 3) { paddings[0], paddings[1], paddings[1]);
// vol2col } else if (filter_shape_vec.size() == 3) {
math::Vol2ColFunctor<Place, T> vol2col; // vol2col
vol2col(context.device_context(), in_slice, col, strides[0], math::Vol2ColFunctor<Place, T> vol2col;
strides[1], strides[2], paddings[0], paddings[1], vol2col(context.device_context(), in_slice, col, strides[0],
paddings[2]); strides[1], strides[2], paddings[0], paddings[1],
paddings[2]);
}
// gemm
Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step);
Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
math::matmul<Place, T>(context.device_context(), filter_slice, false,
col_matrix, false, T(1.0), &out_slice, T(0.0));
} }
}
} else {
for (int i = 0; i < batch_size; i++) {
Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape);
for (int g = 0; g < groups; g++) {
Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
// gemm col.ShareDataWith(in_slice);
Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step); col_matrix.ShareDataWith(col);
Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step); col_matrix.Resize(col_matrix_shape);
math::matmul<Place, T>(context.device_context(), filter_slice, false,
col_matrix, false, T(1.0), &out_slice, T(0.0)); // gemm
Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step);
Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
math::matmul<Place, T>(context.device_context(), filter_slice, false,
col_matrix, false, T(1.0), &out_slice, T(0.0));
}
} }
} }
} }
...@@ -235,14 +270,17 @@ class GemmConvGradKernel : public framework::OpKernel<T> { ...@@ -235,14 +270,17 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
int in_step = static_cast<int>(input->dims()[1]) / groups; int in_step = static_cast<int>(input->dims()[1]) / groups;
int out_step = static_cast<int>(output_grad->dims()[1]) / groups; int out_step = static_cast<int>(output_grad->dims()[1]) / groups;
bool not_expand = NotExpand(filter_shape_vec, strides, paddings, dilations);
Tensor col; Tensor col;
// col_matrix shares the same piece of data with col, // col_matrix shares the same piece of data with col,
// but will be reshaped into a two-dimensional matrix shape // but will be reshaped into a two-dimensional matrix shape
// to call the matrix multiplication interface. // to call the matrix multiplication interface.
Tensor col_matrix; Tensor col_matrix;
col.mutable_data<T>(col_shape, context.GetPlace()); if (!not_expand) {
col_matrix.ShareDataWith(col); col.mutable_data<T>(col_shape, context.GetPlace());
col_matrix.Resize(col_matrix_shape); col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
}
math::SetConstant<Place, T> set_zero; math::SetConstant<Place, T> set_zero;
...@@ -250,33 +288,60 @@ class GemmConvGradKernel : public framework::OpKernel<T> { ...@@ -250,33 +288,60 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
input_grad->mutable_data<T>(context.GetPlace()); input_grad->mutable_data<T>(context.GetPlace());
set_zero(context.device_context(), input_grad, static_cast<T>(0)); set_zero(context.device_context(), input_grad, static_cast<T>(0));
for (int i = 0; i < batch_size; i++) { if (!not_expand) {
Tensor out_grad_batch = for (int i = 0; i < batch_size; i++) {
output_grad->Slice(i, i + 1).Resize(output_matrix_shape); Tensor out_grad_batch =
Tensor in_grad_batch = input_grad->Slice(i, i + 1).Resize(input_shape); output_grad->Slice(i, i + 1).Resize(output_matrix_shape);
for (int g = 0; g < groups; g++) { Tensor in_grad_batch =
// gemm input_grad->Slice(i, i + 1).Resize(input_shape);
Tensor out_grad_slice = for (int g = 0; g < groups; g++) {
out_grad_batch.Slice(g * out_step, (g + 1) * out_step); // gemm
Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step); Tensor out_grad_slice =
math::matmul<Place, T>(context.device_context(), filter_slice, true, out_grad_batch.Slice(g * out_step, (g + 1) * out_step);
out_grad_slice, false, T(1.0), &col_matrix, Tensor filter_slice =
T(0.0)); filter.Slice(g * out_step, (g + 1) * out_step);
// col2im math::matmul<Place, T>(context.device_context(), filter_slice, true,
Tensor in_grad_slice = out_grad_slice, false, T(1.0), &col_matrix,
in_grad_batch.Slice(g * in_step, (g + 1) * in_step); T(0.0));
Tensor in_grad_slice =
if (filter_shape_vec.size() == 2) { in_grad_batch.Slice(g * in_step, (g + 1) * in_step);
math::Col2ImFunctor<math::ColFormat::kCFO, Place, T> col2im;
col2im(context.device_context(), in_grad_slice, col, dilations[0], if (filter_shape_vec.size() == 2) {
dilations[1], strides[0], strides[1], paddings[0], math::Col2ImFunctor<math::ColFormat::kCFO, Place, T> col2im;
paddings[0], paddings[1], paddings[1]); col2im(context.device_context(), in_grad_slice, col, dilations[0],
dilations[1], strides[0], strides[1], paddings[0],
} else if (filter_shape_vec.size() == 3) { paddings[0], paddings[1], paddings[1]);
math::Col2VolFunctor<Place, T> col2vol;
col2vol(context.device_context(), in_grad_slice, col, strides[0], } else if (filter_shape_vec.size() == 3) {
strides[1], strides[2], paddings[0], paddings[1], math::Col2VolFunctor<Place, T> col2vol;
paddings[2]); col2vol(context.device_context(), in_grad_slice, col, strides[0],
strides[1], strides[2], paddings[0], paddings[1],
paddings[2]);
}
}
}
} else {
for (int i = 0; i < batch_size; i++) {
Tensor out_grad_batch =
output_grad->Slice(i, i + 1).Resize(output_matrix_shape);
Tensor in_grad_batch =
input_grad->Slice(i, i + 1).Resize(input_shape);
for (int g = 0; g < groups; g++) {
// gemm
Tensor out_grad_slice =
out_grad_batch.Slice(g * out_step, (g + 1) * out_step);
Tensor filter_slice =
filter.Slice(g * out_step, (g + 1) * out_step);
Tensor in_grad_slice =
in_grad_batch.Slice(g * in_step, (g + 1) * in_step);
col_matrix.ShareDataWith(in_grad_slice);
col_matrix.Resize(col_matrix_shape);
math::matmul<Place, T>(context.device_context(), filter_slice, true,
out_grad_slice, false, T(1.0), &col_matrix,
T(0.0));
} }
} }
} }
...@@ -288,34 +353,59 @@ class GemmConvGradKernel : public framework::OpKernel<T> { ...@@ -288,34 +353,59 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
filter_grad_.Resize(filter_matrix_shape); filter_grad_.Resize(filter_matrix_shape);
set_zero(context.device_context(), filter_grad, static_cast<T>(0)); set_zero(context.device_context(), filter_grad, static_cast<T>(0));
for (int i = 0; i < batch_size; i++) { if (!not_expand) {
Tensor out_grad_batch = for (int i = 0; i < batch_size; i++) {
output_grad->Slice(i, i + 1).Resize(output_matrix_shape); Tensor out_grad_batch =
Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape); output_grad->Slice(i, i + 1).Resize(output_matrix_shape);
for (int g = 0; g < groups; g++) { Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
// im2col for (int g = 0; g < groups; g++) {
Tensor out_grad_slice = // im2col
out_grad_batch.Slice(g * out_step, (g + 1) * out_step); Tensor out_grad_slice =
Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step); out_grad_batch.Slice(g * out_step, (g + 1) * out_step);
Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
if (filter_shape_vec.size() == 2) {
math::Im2ColFunctor<math::ColFormat::kCFO, Place, T> im2col; if (filter_shape_vec.size() == 2) {
im2col(context.device_context(), in_slice, col, dilations[0], math::Im2ColFunctor<math::ColFormat::kCFO, Place, T> im2col;
dilations[1], strides[0], strides[1], paddings[0], im2col(context.device_context(), in_slice, col, dilations[0],
paddings[0], paddings[1], paddings[1]); dilations[1], strides[0], strides[1], paddings[0],
} else if (filter_shape_vec.size() == 3) { paddings[0], paddings[1], paddings[1]);
math::Vol2ColFunctor<Place, T> vol2col; } else if (filter_shape_vec.size() == 3) {
vol2col(context.device_context(), in_slice, col, strides[0], math::Vol2ColFunctor<Place, T> vol2col;
strides[1], strides[2], paddings[0], paddings[1], vol2col(context.device_context(), in_slice, col, strides[0],
paddings[2]); strides[1], strides[2], paddings[0], paddings[1],
paddings[2]);
}
// gemm
Tensor filter_grad_slice =
filter_grad_.Slice(g * out_step, (g + 1) * out_step);
math::matmul<Place, T>(context.device_context(), out_grad_slice,
false, col_matrix, true, T(1.0),
&filter_grad_slice, T(1.0));
}
}
} else {
for (int i = 0; i < batch_size; i++) {
Tensor out_grad_batch =
output_grad->Slice(i, i + 1).Resize(output_matrix_shape);
Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
for (int g = 0; g < groups; g++) {
// im2col
Tensor out_grad_slice =
out_grad_batch.Slice(g * out_step, (g + 1) * out_step);
Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
col.ShareDataWith(in_slice);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
// gemm
Tensor filter_grad_slice =
filter_grad_.Slice(g * out_step, (g + 1) * out_step);
math::matmul<Place, T>(context.device_context(), out_grad_slice,
false, col_matrix, true, T(1.0),
&filter_grad_slice, T(1.0));
} }
// gemm
Tensor filter_grad_slice =
filter_grad_.Slice(g * out_step, (g + 1) * out_step);
math::matmul<Place, T>(context.device_context(), out_grad_slice,
false, col_matrix, true, T(1.0),
&filter_grad_slice, T(1.0));
} }
} }
} }
......
...@@ -104,6 +104,25 @@ class TestWithGroup(TestConv2dOp): ...@@ -104,6 +104,25 @@ class TestWithGroup(TestConv2dOp):
self.op_type = "conv2d" self.op_type = "conv2d"
class TestWith1x1(TestConv2dOp):
def init_test_case(self):
self.pad = [0, 0]
self.stride = [1, 1]
self.input_size = [2, 3, 5, 5] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] / self.groups
self.filter_size = [6, f_c, 1, 1]
def init_dilation(self):
self.dilations = [1, 1]
def init_group(self):
self.groups = 3
def init_op_type(self):
self.op_type = "conv2d"
#----------------Conv2dCudnn---------------- #----------------Conv2dCudnn----------------
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册