提交 eafbbc11 编写于 作者: C chengduoZH

write conv2d and conv3d together

上级 9f7c9875
......@@ -69,6 +69,13 @@ function(op_library TARGET)
file(APPEND ${pybind_file} "USE_OP(max_pool2d_with_index);\n")
endif()
# conv_op contains several operators
if ("${TARGET}" STREQUAL "conv_op")
set(pybind_flag 1)
# It's enough to just adding one operator to pybind
file(APPEND ${pybind_file} "USE_OP(conv2d);\n")
endif()
# save_restore_op contains several operators
if ("${TARGET}" STREQUAL "save_restore_op")
set(pybind_flag 1)
......@@ -123,7 +130,7 @@ set(DEPS_OPS
sum_op
pool_op
pool_with_index_op
conv3d_op
conv_op
lstm_op)
......@@ -133,7 +140,7 @@ op_library(cond_op SRCS cond_op.cc DEPS framework_proto tensor operator net_op)
op_library(cross_entropy_op DEPS cross_entropy)
op_library(softmax_with_cross_entropy_op DEPS cross_entropy softmax)
op_library(sum_op DEPS net_op)
op_library(conv3d_op DEPS vol2col)
op_library(conv_op DEPS vol2col)
op_library(pool_op DEPS pooling)
op_library(pool_with_index_op DEPS pooling)
op_library(lstm_op DEPS sequence2batch lstm_compute)
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/conv2d_op.h"
namespace paddle {
namespace operators {
void Conv2DOp::InferShape(framework::InferShapeContext* ctx) const {
PADDLE_ENFORCE(ctx->HasInput("Input"),
"Input(Input) of Conv2DOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Filter"),
"Input(Filter) of Conv2DOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Output"),
"Output(Output) of Conv2DOp should not be null.");
auto in_dims = ctx->GetInputDim("Input");
auto filter_dims = ctx->GetInputDim("Filter");
std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides");
std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
int groups = ctx->Attrs().Get<int>("groups");
int input_channels = in_dims[1];
int output_channels = filter_dims[0];
PADDLE_ENFORCE_EQ(in_dims.size(), 4, "Conv2DOp input should be 4-D.");
PADDLE_ENFORCE_EQ(filter_dims.size(), 4, "Conv2DOp filter should be 4-D.");
PADDLE_ENFORCE_EQ(input_channels, filter_dims[1] * groups,
"The number of input channels should be equal to filter "
"channels * groups.");
PADDLE_ENFORCE_EQ(
output_channels % groups, 0,
"The number of output channels should be divided by groups.");
auto output_height =
OutputSize(in_dims[2], filter_dims[2], paddings[0], strides[0]);
auto output_width =
OutputSize(in_dims[3], filter_dims[3], paddings[1], strides[1]);
ctx->SetOutputDim("Output",
{in_dims[0], filter_dims[0], output_height, output_width});
}
Conv2DOpMaker::Conv2DOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput(
"Input",
"The input tensor of convolution operator. "
"The format of input tensor is NCHW. Where N is batch size, C is the "
"number of channels, H and W is the height and width of image.");
AddInput("Filter",
"The filter tensor of convolution operator."
"The format of the filter tensor is MCHW, where M is the number of "
"output image channels, C is the number of input image channels, "
"H and W is height and width of filter. "
"If the groups attribute is greater than 1, C equal the number of "
"input image channels divided by the groups.");
AddOutput("Output",
"The output tensor of convolution operator."
"The format of output tensor is also NCHW.");
AddAttr<std::vector<int>>("strides", "strides of convolution operator.")
.SetDefault({1, 1});
AddAttr<std::vector<int>>("paddings", "paddings of convolution operator.")
.SetDefault({0, 0});
AddAttr<int>(
"groups",
"group size of convolution operator. "
"Refer to grouped convolution in Alex Krizhevsky's paper: "
"when group=2, the first half of the filters are only connected to the "
"first half of the input channels, and the second half only connected "
"to the second half.")
.SetDefault(1);
AddComment(R"DOC(
The convolution operation calculates the output based on the input, filter
and strides, paddings, groups parameters. The size of each dimension of the
parameters is checked in the infer-shape.
)DOC");
}
void Conv2DOpGrad::InferShape(framework::InferShapeContext* ctx) const {
auto in_dims = ctx->GetInputDim("Input");
auto filter_dims = ctx->GetInputDim("Filter");
if (ctx->HasOutput(framework::GradVarName("Input"))) {
ctx->SetOutputDim(framework::GradVarName("Input"), in_dims);
}
if (ctx->HasOutput(framework::GradVarName("Filter"))) {
ctx->SetOutputDim(framework::GradVarName("Filter"), filter_dims);
}
}
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(conv2d, ops::Conv2DOp, ops::Conv2DOpMaker, conv2d_grad,
ops::Conv2DOpGrad);
REGISTER_OP_CPU_KERNEL(
conv2d, ops::GemmConv2DKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(
conv2d_grad, ops::GemmConvGrad2DKernel<paddle::platform::CPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/conv3d_op.h"
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(
conv3d, ops::GemmConv3DKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(
conv3d_grad, ops::GemmConvGrad3DKernel<paddle::platform::GPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
#include "paddle/operators/math/math_function.h"
#include "paddle/operators/math/vol2col.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
class Conv3DOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext* ctx) const override;
};
class Conv3DOpGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext* ctx) const override;
};
class Conv3DOpMaker : public framework::OpProtoAndCheckerMaker {
public:
Conv3DOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker);
};
template <typename Place, typename T>
class GemmConv3DKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
const Tensor* input = context.Input<Tensor>("Input");
// The filter will be reshaped in the calculations,
// so here use an assignment operation,
// that avoids modifying the variable in the Scope.
Tensor filter = *context.Input<Tensor>("Filter");
Tensor* output = context.Output<Tensor>("Output");
output->mutable_data<T>(context.GetPlace());
std::vector<int> strides = context.Attr<std::vector<int>>("strides");
std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
int groups = context.Attr<int>("groups");
int batch_size = input->dims()[0];
int input_channels = input->dims()[1];
int filter_depth = filter.dims()[filter.dims().size() - 3];
int filter_height = filter.dims()[filter.dims().size() - 2];
int filter_width = filter.dims()[filter.dims().size() - 1];
int output_channels = output->dims()[1];
int output_depth = output->dims()[2];
int output_height = output->dims()[3];
int output_width = output->dims()[4];
paddle::operators::math::Vol2ColFunctor<Place, T> vol2col;
// use col_shape in the vol2col calculation
framework::DDim col_shape = {input_channels / groups,
filter_depth,
filter_height,
filter_width,
output_depth,
output_height,
output_width};
// use col_matrix_shape in the gemm calculation
framework::DDim col_matrix_shape = {
input_channels / groups * filter_depth * filter_height * filter_width,
output_depth * output_height * output_width};
Tensor col;
col.mutable_data<T>(col_shape, context.GetPlace());
// col_matrix shares the same piece of data with col,
// but will be reshaped into a two-dimensional matrix shape
// to call the matrix multiplication interface.
Tensor col_matrix = col;
col_matrix.Resize(col_matrix_shape);
framework::DDim input_shape = {
input->dims()[1], input->dims()[2], input->dims()[3],
input->dims()[4]}; // channel, depth, height, width
framework::DDim filter_matrix_shape = {
filter.dims()[0],
filter.numel() / filter.dims()[0]}; // filter_out_channel,
// filter_in_channel*filter_depth*filter_height*filter_width
filter.Resize(filter_matrix_shape);
framework::DDim output_matrix_shape = {
output_channels, output_depth * output_height * output_width};
// convolution operator: vol2col + gemm
int in_step = input_channels / groups;
int out_step = output_channels / groups;
for (int i = 0; i < batch_size; i++) {
Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape);
for (int g = 0; g < groups; g++) {
// vol2col
Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
vol2col(context.device_context(), in_slice, col, strides[0], strides[1],
strides[2], paddings[0], paddings[1], paddings[2]);
// gemm
Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step);
Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
math::matmul<Place, T>(context.device_context(), filter_slice, false,
col_matrix, false, T(1.0), &out_slice, T(0.0));
}
}
}
};
template <typename Place, typename T>
class GemmConvGrad3DKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
const Tensor* input = context.Input<Tensor>("Input");
const Tensor* output_grad =
context.Input<Tensor>(framework::GradVarName("Output"));
Tensor* input_grad =
context.Output<Tensor>(framework::GradVarName("Input"));
Tensor* filter_grad =
context.Output<Tensor>(framework::GradVarName("Filter"));
// The filter and filter_grad will be reshaped in the calculations,
// so here use an assignment operation,
// that avoids modifying the variable in the Scope.
Tensor filter = *context.Input<Tensor>("Filter");
std::vector<int> strides = context.Attr<std::vector<int>>("strides");
std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
int groups = context.Attr<int>("groups");
int batch_size = input->dims()[0];
int input_channels = input->dims()[1];
int filter_depth = filter.dims()[filter.dims().size() - 3];
int filter_height = filter.dims()[filter.dims().size() - 2];
int filter_width = filter.dims()[filter.dims().size() - 1];
int output_channels = output_grad->dims()[1];
int output_depth = output_grad->dims()[2];
int output_height = output_grad->dims()[3];
int output_width = output_grad->dims()[4];
paddle::operators::math::Col2VolFunctor<Place, T> col2vol;
paddle::operators::math::Vol2ColFunctor<Place, T> vol2col;
// use col_shape in the vol2col and col2vol calculation
framework::DDim col_shape = {input_channels / groups,
filter_depth,
filter_height,
filter_width,
output_depth,
output_height,
output_width};
// use col_matrix_shape in the gemm calculation
framework::DDim col_matrix_shape = {
input_channels / groups * filter_depth * filter_height * filter_width,
output_depth * output_height * output_width};
Tensor col;
col.mutable_data<T>(col_shape, context.GetPlace());
// col_matrix shares the same piece of data with col,
// but will be reshaped into a two-dimensional matrix shape
// to call the matrix multiplication interface.
Tensor col_matrix = col;
col_matrix.Resize(col_matrix_shape);
framework::DDim input_shape = {
input->dims()[1], input->dims()[2], input->dims()[3],
input->dims()[4]}; // channel, depth, height, width
framework::DDim output_matrix_shape = {output_grad->dims()[1],
output_grad->dims()[2] *
output_grad->dims()[3] *
output_grad->dims()[4]};
framework::DDim filter_matrix_shape = {
filter.dims()[0],
filter.numel() / filter.dims()[0]}; // filter_out_channel,
// filter_in_channel*filter_depth*filter_height*filter_width
filter.Resize(filter_matrix_shape);
// convolution backward input operator: gemm + col2vol
// convolution backward weight operator: vol2col + gemm
int in_step = input_channels / groups;
int out_step = output_channels / groups;
if (input_grad) {
input_grad->mutable_data<T>(context.GetPlace());
auto t = framework::EigenVector<T>::Flatten(*input_grad);
t.device(context.GetEigenDevice<Place>()) = t.constant(static_cast<T>(0));
for (int i = 0; i < batch_size; i++) {
Tensor out_grad_batch =
output_grad->Slice(i, i + 1).Resize(output_matrix_shape);
Tensor in_grad_batch = input_grad->Slice(i, i + 1).Resize(input_shape);
for (int g = 0; g < groups; g++) {
// gemm
Tensor out_grad_slice =
out_grad_batch.Slice(g * out_step, (g + 1) * out_step);
Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
math::matmul<Place, T>(context.device_context(), filter_slice, true,
out_grad_slice, false, T(1.0), &col_matrix,
T(0.0));
// col2vol
Tensor in_grad_slice =
in_grad_batch.Slice(g * in_step, (g + 1) * in_step);
col2vol(context.device_context(), in_grad_slice, col, strides[0],
strides[1], strides[2], paddings[0], paddings[1],
paddings[2]);
}
}
}
if (filter_grad) {
filter_grad->mutable_data<T>(context.GetPlace());
Tensor filter_grad_ = *filter_grad;
filter_grad_.Resize(filter_matrix_shape);
auto t = framework::EigenVector<T>::Flatten(filter_grad_);
t.device(context.GetEigenDevice<Place>()) = t.constant(static_cast<T>(0));
for (int i = 0; i < batch_size; i++) {
Tensor out_grad_batch =
output_grad->Slice(i, i + 1).Resize(output_matrix_shape);
Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
for (int g = 0; g < groups; g++) {
// vol2col
Tensor out_grad_slice =
out_grad_batch.Slice(g * out_step, (g + 1) * out_step);
Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
vol2col(context.device_context(), in_slice, col, strides[0],
strides[1], strides[2], paddings[0], paddings[1],
paddings[2]);
// gemm
Tensor filter_grad_slice =
filter_grad_.Slice(g * out_step, (g + 1) * out_step);
math::matmul<Place, T>(context.device_context(), out_grad_slice,
false, col_matrix, true, T(1.0),
&filter_grad_slice, T(1.0));
}
}
}
}
};
} // namespace operators
} // namespace paddle
......@@ -12,7 +12,7 @@
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/conv2d_op.h"
#include "paddle/operators/conv_op.h"
namespace paddle {
namespace operators {
......@@ -38,8 +38,9 @@ class CudnnConvOpMaker : public Conv2DOpMaker {
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(conv_cudnn, ops::Conv2DOp, ops::CudnnConvOpMaker, conv_cudnn_grad,
ops::Conv2DOpGrad);
REGISTER_OP(conv_cudnn, ops::ConvOp, ops::CudnnConvOpMaker, conv_cudnn_grad,
ops::ConvOpGrad);
REGISTER_OP_CPU_KERNEL(
conv_cudnn, ops::GemmConv2DKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(
......
......@@ -15,7 +15,7 @@
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
#include "paddle/memory/memory.h"
#include "paddle/operators/conv2d_op.h"
#include "paddle/operators/conv_op.h"
#include "paddle/platform/assert.h"
#include "paddle/platform/cudnn_helper.h"
......
......@@ -12,23 +12,18 @@
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/conv3d_op.h"
#include "paddle/operators/conv_op.h"
namespace paddle {
namespace operators {
int OutputSizeConv3d(int input_size, int filter_size, int padding, int stride) {
int output_size = (input_size - filter_size + 2 * padding) / stride + 1;
return output_size;
}
void Conv3DOp::InferShape(framework::InferShapeContext* ctx) const {
void ConvOp::InferShape(framework::InferShapeContext* ctx) const {
PADDLE_ENFORCE(ctx->HasInput("Input"),
"Input(Input) of Conv3DOp should not be null.");
"Input(Input) of ConvOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Filter"),
"Input(Filter) of Conv3DOp should not be null.");
"Input(Filter) of ConvOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Output"),
"Output(Output) of Conv3DOp should not be null.");
"Output(Output) of ConvOp should not be null.");
auto in_dims = ctx->GetInputDim("Input");
auto filter_dims = ctx->GetInputDim("Filter");
......@@ -38,33 +33,65 @@ void Conv3DOp::InferShape(framework::InferShapeContext* ctx) const {
int input_channels = in_dims[1];
int output_channels = filter_dims[0];
PADDLE_ENFORCE_EQ(in_dims.size(), 5, "Conv3DOp input should be 5-D tensor.");
PADDLE_ENFORCE_EQ(filter_dims.size(), 5,
"Conv3DOp filter should be 5-D tensor.");
PADDLE_ENFORCE_EQ(
in_dims.size(), filter_dims.size(),
"Conv input dimension and filter dimension should be the same.");
PADDLE_ENFORCE(
in_dims.size() - strides.size() == 2U,
"Conv input dimension and strides dimension should be consistent.");
PADDLE_ENFORCE_EQ(
paddings.size(), strides.size(),
"Conv paddings dimension and Conv strides dimension should be the same.");
PADDLE_ENFORCE_EQ(input_channels, filter_dims[1] * groups,
"The number of input channels should be equal to filter "
"(channels * groups).");
"channels * groups.");
PADDLE_ENFORCE_EQ(
output_channels % groups, 0,
"The number of output channels should be divided by groups.");
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < paddings.size(); ++i) {
output_shape.push_back(OutputSizeConv3d(in_dims[i + 2], filter_dims[i + 2],
paddings[i], strides[i]));
output_shape.push_back(OutputSize(in_dims[i + 2], filter_dims[i + 2],
paddings[i], strides[i]));
}
ctx->SetOutputDim("Output", framework::make_ddim(output_shape));
}
void Conv3DOpGrad::InferShape(framework::InferShapeContext* ctx) const {
auto in_dims = ctx->GetInputDim("Input");
auto filter_dims = ctx->GetInputDim("Filter");
if (ctx->HasOutput(framework::GradVarName("Input"))) {
ctx->SetOutputDim(framework::GradVarName("Input"), in_dims);
}
if (ctx->HasOutput(framework::GradVarName("Filter"))) {
ctx->SetOutputDim(framework::GradVarName("Filter"), filter_dims);
}
Conv2DOpMaker::Conv2DOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput(
"Input",
"The input tensor of convolution operator. "
"The format of input tensor is NCHW. Where N is batch size, C is the "
"number of channels, H and W is the height and width of image.");
AddInput("Filter",
"The filter tensor of convolution operator."
"The format of the filter tensor is MCHW, where M is the number of "
"output image channels, C is the number of input image channels, "
"H and W is height and width of filter. "
"If the groups attribute is greater than 1, C equal the number of "
"input image channels divided by the groups.");
AddOutput("Output",
"The output tensor of convolution operator."
"The format of output tensor is also NCHW.");
AddAttr<std::vector<int>>("strides", "strides of convolution operator.")
.SetDefault({1, 1});
AddAttr<std::vector<int>>("paddings", "paddings of convolution operator.")
.SetDefault({0, 0});
AddAttr<int>(
"groups",
"group size of convolution operator. "
"Refer to grouped convolution in Alex Krizhevsky's paper: "
"when group=2, the first half of the filters are only connected to the "
"first half of the input channels, and the second half only connected "
"to the second half.")
.SetDefault(1);
AddComment(R"DOC(
The convolution operation calculates the output based on the input, filter
and strides, paddings, groups parameters. The size of each dimension of the
parameters is checked in the infer-shape.
)DOC");
}
Conv3DOpMaker::Conv3DOpMaker(framework::OpProto* proto,
......@@ -125,12 +152,31 @@ Example:
)DOC");
}
void ConvOpGrad::InferShape(framework::InferShapeContext* ctx) const {
auto in_dims = ctx->GetInputDim("Input");
auto filter_dims = ctx->GetInputDim("Filter");
if (ctx->HasOutput(framework::GradVarName("Input"))) {
ctx->SetOutputDim(framework::GradVarName("Input"), in_dims);
}
if (ctx->HasOutput(framework::GradVarName("Filter"))) {
ctx->SetOutputDim(framework::GradVarName("Filter"), filter_dims);
}
}
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(conv3d, ops::Conv3DOp, ops::Conv3DOpMaker, conv3d_grad,
ops::Conv3DOpGrad);
REGISTER_OP(conv2d, ops::ConvOp, ops::Conv2DOpMaker, conv2d_grad,
ops::ConvOpGrad);
namespace ops = paddle::operators;
REGISTER_OP(conv3d, ops::ConvOp, ops::Conv3DOpMaker, conv3d_grad,
ops::ConvOpGrad);
REGISTER_OP_CPU_KERNEL(
conv2d, ops::GemmConv2DKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(
conv2d_grad, ops::GemmConvGrad2DKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(
conv3d, ops::GemmConv3DKernel<paddle::platform::CPUPlace, float>);
......
......@@ -12,7 +12,7 @@
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/conv2d_op.h"
#include "paddle/operators/conv_op.h"
namespace ops = paddle::operators;
......@@ -20,3 +20,8 @@ REGISTER_OP_GPU_KERNEL(
conv2d, ops::GemmConv2DKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(
conv2d_grad, ops::GemmConvGrad2DKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(
conv3d, ops::GemmConv3DKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(
conv3d_grad, ops::GemmConvGrad3DKernel<paddle::platform::GPUPlace, float>);
......@@ -18,6 +18,7 @@ limitations under the License. */
#include "paddle/framework/op_registry.h"
#include "paddle/operators/math/im2col.h"
#include "paddle/operators/math/math_function.h"
#include "paddle/operators/math/vol2col.h"
namespace paddle {
namespace operators {
......@@ -40,14 +41,20 @@ class Conv2DOpMaker : public framework::OpProtoAndCheckerMaker {
framework::OpAttrChecker* op_checker);
};
class Conv2DOp : public framework::OperatorWithKernel {
class Conv3DOpMaker : public framework::OpProtoAndCheckerMaker {
public:
Conv3DOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker);
};
class ConvOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override;
};
class Conv2DOpGrad : public framework::OperatorWithKernel {
class ConvOpGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
......@@ -251,5 +258,218 @@ class GemmConvGrad2DKernel : public framework::OpKernel<T> {
}
};
template <typename Place, typename T>
class GemmConv3DKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
const Tensor* input = context.Input<Tensor>("Input");
// The filter will be reshaped in the calculations,
// so here use an assignment operation,
// that avoids modifying the variable in the Scope.
Tensor filter = *context.Input<Tensor>("Filter");
Tensor* output = context.Output<Tensor>("Output");
output->mutable_data<T>(context.GetPlace());
std::vector<int> strides = context.Attr<std::vector<int>>("strides");
std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
int groups = context.Attr<int>("groups");
int batch_size = input->dims()[0];
int input_channels = input->dims()[1];
int filter_depth = filter.dims()[filter.dims().size() - 3];
int filter_height = filter.dims()[filter.dims().size() - 2];
int filter_width = filter.dims()[filter.dims().size() - 1];
int output_channels = output->dims()[1];
int output_depth = output->dims()[2];
int output_height = output->dims()[3];
int output_width = output->dims()[4];
paddle::operators::math::Vol2ColFunctor<Place, T> vol2col;
// use col_shape in the vol2col calculation
framework::DDim col_shape = {input_channels / groups,
filter_depth,
filter_height,
filter_width,
output_depth,
output_height,
output_width};
// use col_matrix_shape in the gemm calculation
framework::DDim col_matrix_shape = {
input_channels / groups * filter_depth * filter_height * filter_width,
output_depth * output_height * output_width};
Tensor col;
col.mutable_data<T>(col_shape, context.GetPlace());
// col_matrix shares the same piece of data with col,
// but will be reshaped into a two-dimensional matrix shape
// to call the matrix multiplication interface.
Tensor col_matrix = col;
col_matrix.Resize(col_matrix_shape);
framework::DDim input_shape = {
input->dims()[1], input->dims()[2], input->dims()[3],
input->dims()[4]}; // channel, depth, height, width
framework::DDim filter_matrix_shape = {
filter.dims()[0],
filter.numel() / filter.dims()[0]}; // filter_out_channel,
// filter_in_channel*filter_depth*filter_height*filter_width
filter.Resize(filter_matrix_shape);
framework::DDim output_matrix_shape = {
output_channels, output_depth * output_height * output_width};
// convolution operator: vol2col + gemm
int in_step = input_channels / groups;
int out_step = output_channels / groups;
for (int i = 0; i < batch_size; i++) {
Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape);
for (int g = 0; g < groups; g++) {
// vol2col
Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
vol2col(context.device_context(), in_slice, col, strides[0], strides[1],
strides[2], paddings[0], paddings[1], paddings[2]);
// gemm
Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step);
Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
math::matmul<Place, T>(context.device_context(), filter_slice, false,
col_matrix, false, T(1.0), &out_slice, T(0.0));
}
}
}
};
template <typename Place, typename T>
class GemmConvGrad3DKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
const Tensor* input = context.Input<Tensor>("Input");
const Tensor* output_grad =
context.Input<Tensor>(framework::GradVarName("Output"));
Tensor* input_grad =
context.Output<Tensor>(framework::GradVarName("Input"));
Tensor* filter_grad =
context.Output<Tensor>(framework::GradVarName("Filter"));
// The filter and filter_grad will be reshaped in the calculations,
// so here use an assignment operation,
// that avoids modifying the variable in the Scope.
Tensor filter = *context.Input<Tensor>("Filter");
std::vector<int> strides = context.Attr<std::vector<int>>("strides");
std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
int groups = context.Attr<int>("groups");
int batch_size = input->dims()[0];
int input_channels = input->dims()[1];
int filter_depth = filter.dims()[filter.dims().size() - 3];
int filter_height = filter.dims()[filter.dims().size() - 2];
int filter_width = filter.dims()[filter.dims().size() - 1];
int output_channels = output_grad->dims()[1];
int output_depth = output_grad->dims()[2];
int output_height = output_grad->dims()[3];
int output_width = output_grad->dims()[4];
paddle::operators::math::Col2VolFunctor<Place, T> col2vol;
paddle::operators::math::Vol2ColFunctor<Place, T> vol2col;
// use col_shape in the vol2col and col2vol calculation
framework::DDim col_shape = {input_channels / groups,
filter_depth,
filter_height,
filter_width,
output_depth,
output_height,
output_width};
// use col_matrix_shape in the gemm calculation
framework::DDim col_matrix_shape = {
input_channels / groups * filter_depth * filter_height * filter_width,
output_depth * output_height * output_width};
Tensor col;
col.mutable_data<T>(col_shape, context.GetPlace());
// col_matrix shares the same piece of data with col,
// but will be reshaped into a two-dimensional matrix shape
// to call the matrix multiplication interface.
Tensor col_matrix = col;
col_matrix.Resize(col_matrix_shape);
framework::DDim input_shape = {
input->dims()[1], input->dims()[2], input->dims()[3],
input->dims()[4]}; // channel, depth, height, width
framework::DDim output_matrix_shape = {output_grad->dims()[1],
output_grad->dims()[2] *
output_grad->dims()[3] *
output_grad->dims()[4]};
framework::DDim filter_matrix_shape = {
filter.dims()[0],
filter.numel() / filter.dims()[0]}; // filter_out_channel,
// filter_in_channel*filter_depth*filter_height*filter_width
filter.Resize(filter_matrix_shape);
// convolution backward input operator: gemm + col2vol
// convolution backward weight operator: vol2col + gemm
int in_step = input_channels / groups;
int out_step = output_channels / groups;
if (input_grad) {
input_grad->mutable_data<T>(context.GetPlace());
auto t = framework::EigenVector<T>::Flatten(*input_grad);
t.device(context.GetEigenDevice<Place>()) = t.constant(static_cast<T>(0));
for (int i = 0; i < batch_size; i++) {
Tensor out_grad_batch =
output_grad->Slice(i, i + 1).Resize(output_matrix_shape);
Tensor in_grad_batch = input_grad->Slice(i, i + 1).Resize(input_shape);
for (int g = 0; g < groups; g++) {
// gemm
Tensor out_grad_slice =
out_grad_batch.Slice(g * out_step, (g + 1) * out_step);
Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
math::matmul<Place, T>(context.device_context(), filter_slice, true,
out_grad_slice, false, T(1.0), &col_matrix,
T(0.0));
// col2vol
Tensor in_grad_slice =
in_grad_batch.Slice(g * in_step, (g + 1) * in_step);
col2vol(context.device_context(), in_grad_slice, col, strides[0],
strides[1], strides[2], paddings[0], paddings[1],
paddings[2]);
}
}
}
if (filter_grad) {
filter_grad->mutable_data<T>(context.GetPlace());
Tensor filter_grad_ = *filter_grad;
filter_grad_.Resize(filter_matrix_shape);
auto t = framework::EigenVector<T>::Flatten(filter_grad_);
t.device(context.GetEigenDevice<Place>()) = t.constant(static_cast<T>(0));
for (int i = 0; i < batch_size; i++) {
Tensor out_grad_batch =
output_grad->Slice(i, i + 1).Resize(output_matrix_shape);
Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
for (int g = 0; g < groups; g++) {
// vol2col
Tensor out_grad_slice =
out_grad_batch.Slice(g * out_step, (g + 1) * out_step);
Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
vol2col(context.device_context(), in_slice, col, strides[0],
strides[1], strides[2], paddings[0], paddings[1],
paddings[2]);
// gemm
Tensor filter_grad_slice =
filter_grad_.Slice(g * out_step, (g + 1) * out_step);
math::matmul<Place, T>(context.device_context(), out_grad_slice,
false, col_matrix, true, T(1.0),
&filter_grad_slice, T(1.0));
}
}
}
}
};
} // namespace operators
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册