未验证 提交 4a544762 编写于 作者: Z zhaoyuchen2018 提交者: GitHub

Add Asypadding for conv fusion. (#21041)

* Add Asypadding for conv fusion.

test=develop

reference: pr/20042

* Fix eigen build link error

* Change back file mode

* Use math function & add more checks.
上级 d2573550
...@@ -73,15 +73,85 @@ class Conv2DFusionOpInferShape : public framework::InferShapeBase { ...@@ -73,15 +73,85 @@ class Conv2DFusionOpInferShape : public framework::InferShapeBase {
std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings"); std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
std::vector<int> dilations = std::vector<int> dilations =
ctx->Attrs().Get<std::vector<int>>("dilations"); ctx->Attrs().Get<std::vector<int>>("dilations");
std::string padding_algorithm =
ctx->Attrs().Get<std::string>("padding_algorithm");
int groups = ctx->Attrs().Get<int>("groups");
std::vector<int64_t> oshape({in_dims[0], filter_dims[0]}); framework::DDim in_data_dims;
for (size_t i = 0; i < strides.size(); ++i) { in_data_dims = framework::slice_ddim(in_dims, 2, in_dims.size());
oshape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
dilations[i], paddings[i], strides[i])); PADDLE_ENFORCE_EQ(
in_dims.size() == 4 || in_dims.size() == 5, true,
"ShapeError: Conv_fusion input should be 4-D or 5-D tensor. But "
"received: %u-D Tensor,"
"the shape of Conv_fusion input is [%s]",
in_dims.size(), in_dims);
PADDLE_ENFORCE_EQ(in_dims.size(), filter_dims.size(),
"ShapeError: Conv_fusion input dimension and filter "
"dimension should be the "
"equal."
"But received: the shape of Conv_fusion input is [%s], "
"input dimension of Conv_fusion "
"input is [%d],"
"the shape of filter is [%s], the filter dimension of "
"Conv_fusion is [%d]",
in_dims, in_dims.size(), filter_dims, filter_dims.size());
int in_sub_stride_size = in_dims.size() - strides.size();
PADDLE_ENFORCE_EQ(
in_dims.size() - strides.size() == 2U, true,
"ShapeError: the dimension of input minus the dimension of "
"stride must be euqal to 2."
"But received: the dimension of input minus the dimension "
"of stride is [%d], the"
"input dimension of Conv_fusion is [%d], the shape of Conv_fusion "
"input "
"is [%s], the stride"
"dimension of Conv_fusion is [%d]",
in_sub_stride_size, in_dims.size(), in_dims, strides.size());
const auto input_channels = in_dims[1];
PADDLE_ENFORCE_EQ(
input_channels, filter_dims[1] * groups,
"ShapeError: The number of input channels should be equal to filter "
"channels * groups. But received: the input channels is [%d], the shape"
"of input is [%s], the filter channel is [%d], the shape of filter is "
"[%s],"
"the groups is [%d]",
in_dims[1], in_dims, filter_dims[1], filter_dims, groups);
PADDLE_ENFORCE_EQ(
filter_dims[0] % groups, 0,
"ShapeError: The number of output channels should be divided by groups."
"But received: the output channels is [%d], the shape of filter is [%s]"
"(the first dimension of filter is output channel), the groups is [%d]",
filter_dims[0], filter_dims, groups);
framework::DDim filter_data_dims =
framework::slice_ddim(filter_dims, 2, filter_dims.size());
std::vector<int> ksize = framework::vectorize<int>(filter_data_dims);
UpdatePaddingAndDilation(&paddings, &dilations, padding_algorithm,
in_data_dims, strides, ksize);
std::vector<int64_t> output_shape({in_dims[0]});
output_shape.push_back(filter_dims[0]);
for (size_t i = 0; i < in_data_dims.size(); ++i) {
if ((!ctx->IsRuntime()) &&
(in_data_dims[i] <= 0 || filter_dims[i + 2] <= 0)) {
output_shape.push_back(-1);
} else {
output_shape.push_back(
ConvOutputSize(in_data_dims[i], filter_dims[i + 2], dilations[i],
paddings[2 * i], paddings[2 * i + 1], strides[i]));
}
} }
PADDLE_ENFORCE_EQ(ctx->HasOutput("Output"), true, PADDLE_ENFORCE_EQ(ctx->HasOutput("Output"), true,
"Output(Output) of ConvOp should not be null."); "Output(Output) of ConvOp should not be null.");
ctx->SetOutputDim("Output", framework::make_ddim(oshape)); ctx->SetOutputDim("Output", framework::make_ddim(output_shape));
std::vector<int> channels = std::vector<int> channels =
ctx->Attrs().Get<std::vector<int>>("split_channels"); ctx->Attrs().Get<std::vector<int>>("split_channels");
if (channels.size()) { if (channels.size()) {
...@@ -90,7 +160,8 @@ class Conv2DFusionOpInferShape : public framework::InferShapeBase { ...@@ -90,7 +160,8 @@ class Conv2DFusionOpInferShape : public framework::InferShapeBase {
std::vector<framework::DDim> oshapes; std::vector<framework::DDim> oshapes;
oshapes.reserve(channels.size()); oshapes.reserve(channels.size());
for (size_t i = 0; i < channels.size(); ++i) { for (size_t i = 0; i < channels.size(); ++i) {
oshapes.push_back({oshape[0], channels[i], oshape[2], oshape[3]}); oshapes.push_back(
{output_shape[0], channels[i], output_shape[2], output_shape[3]});
} }
ctx->SetOutputsDim("Outputs", oshapes); ctx->SetOutputsDim("Outputs", oshapes);
} }
......
...@@ -13,7 +13,10 @@ See the License for the specific language governing permissions and ...@@ -13,7 +13,10 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/conv_cudnn_helper.h"
#include "paddle/fluid/operators/conv_cudnn_op_cache.h" #include "paddle/fluid/operators/conv_cudnn_op_cache.h"
#include "paddle/fluid/operators/conv_op.h"
#include "paddle/fluid/operators/math/padding.h"
#include "paddle/fluid/platform/cudnn_helper.h" #include "paddle/fluid/platform/cudnn_helper.h"
DECLARE_int64(cudnn_exhaustive_search_times); DECLARE_int64(cudnn_exhaustive_search_times);
...@@ -44,6 +47,7 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> { ...@@ -44,6 +47,7 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_NOT_NULL(bias, "The bias should not be null."); PADDLE_ENFORCE_NOT_NULL(bias, "The bias should not be null.");
auto* residual = ctx.Input<Tensor>("ResidualData"); auto* residual = ctx.Input<Tensor>("ResidualData");
auto* output = ctx.Output<Tensor>("Output"); auto* output = ctx.Output<Tensor>("Output");
output->mutable_data<T>(ctx.GetPlace());
std::vector<int> strides = ctx.Attr<std::vector<int>>("strides"); std::vector<int> strides = ctx.Attr<std::vector<int>>("strides");
std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings"); std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
...@@ -55,11 +59,96 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> { ...@@ -55,11 +59,96 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
bool exhaustive_search = bool exhaustive_search =
FLAGS_cudnn_exhaustive_search || ctx.Attr<bool>("exhaustive_search"); FLAGS_cudnn_exhaustive_search || ctx.Attr<bool>("exhaustive_search");
const T* input_data = input->data<T>(); // const T* input_data = input->data<T>();
const T* filter_data = filter->data<T>(); const T* filter_data = filter->data<T>();
const T* bias_data = bias->data<T>(); const T* bias_data = bias->data<T>();
T* output_data = output->mutable_data<T>(ctx.GetPlace()); // T* output_data = output->mutable_data<T>(ctx.GetPlace());
const std::string padding_algorithm =
ctx.Attr<std::string>("padding_algorithm");
const std::string data_format = ctx.Attr<std::string>("data_format");
Tensor transformed_input_channel(input->type());
Tensor transformed_output(output->type());
T* output_data = nullptr;
transformed_input_channel = *input;
transformed_output = *output;
output_data = transformed_output.data<T>();
const T* residual_data = residual ? residual->data<T>() : output_data; const T* residual_data = residual ? residual->data<T>() : output_data;
// update padding and dilation
auto in_dims = transformed_input_channel.dims();
auto filter_dims = filter->dims();
framework::DDim in_data_dims;
in_data_dims = framework::slice_ddim(in_dims, 2, in_dims.size());
framework::DDim filter_data_dims =
framework::slice_ddim(filter_dims, 2, filter_dims.size());
std::vector<int> ksize = framework::vectorize<int>(filter_data_dims);
UpdatePaddingAndDilation(&paddings, &dilations, padding_algorithm,
in_data_dims, strides, ksize);
int data_dim = strides.size(); // 2d or 3d
bool is_sys_pad = math::IsSymmetricPadding(paddings, data_dim);
Tensor transformed_input;
std::vector<int> padding_common(data_dim, 0);
if (!is_sys_pad) {
std::vector<int> padding_diff(data_dim);
std::vector<int> new_input_shape_vec(data_dim + 2);
new_input_shape_vec[0] = transformed_input_channel.dims()[0];
new_input_shape_vec[1] = transformed_input_channel.dims()[1];
std::vector<int> input_pad(transformed_input_channel.dims().size() * 2,
0);
for (size_t i = 0; i < data_dim; ++i) {
padding_diff[i] = std::abs(paddings[2 * i] - paddings[2 * i + 1]);
padding_common[i] = std::min(paddings[2 * i], paddings[2 * i + 1]);
new_input_shape_vec[i + 2] =
transformed_input_channel.dims()[i + 2] + padding_diff[i];
input_pad[2 * i + 4] = paddings[2 * i] - padding_common[i];
input_pad[2 * i + 4 + 1] = paddings[2 * i + 1] - padding_common[i];
}
framework::DDim new_input_shape(
framework::make_ddim(new_input_shape_vec));
transformed_input.Resize(new_input_shape);
auto& dev_ctx =
ctx.template device_context<paddle::platform::CUDADeviceContext>();
transformed_input =
ctx.AllocateTmpTensor<T, paddle::platform::CUDADeviceContext>(
new_input_shape, dev_ctx);
const int rank = transformed_input_channel.dims().size();
T pad_value(0.0);
switch (rank) {
case 4: {
math::PadFunction<paddle::platform::CUDADeviceContext, T, 4>(
ctx, input_pad, transformed_input_channel, pad_value,
&transformed_input);
} break;
case 5: {
math::PadFunction<paddle::platform::CUDADeviceContext, T, 5>(
ctx, input_pad, transformed_input_channel, pad_value,
&transformed_input);
} break;
default:
PADDLE_THROW("ConvOp only support tensors with 4 or 5 dimensions.");
}
} else {
transformed_input = transformed_input_channel;
if (paddings.size() == data_dim) {
for (size_t i = 0; i < data_dim; ++i) {
padding_common[i] = paddings[i];
}
} else {
for (size_t i = 0; i < data_dim; ++i) {
padding_common[i] = paddings[2 * i];
}
}
}
const T* input_data = transformed_input.data<T>();
// ------------------- cudnn descriptors --------------------- // ------------------- cudnn descriptors ---------------------
ScopedTensorDescriptor input_desc; ScopedTensorDescriptor input_desc;
...@@ -74,18 +163,19 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> { ...@@ -74,18 +163,19 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
} }
cudnnConvolutionDescriptor_t cudnn_conv_desc = cudnnConvolutionDescriptor_t cudnn_conv_desc =
conv_desc.descriptor<T>(paddings, strides, dilations); conv_desc.descriptor<T>(padding_common, strides, dilations);
CUDNN_ENFORCE(platform::dynload::cudnnSetConvolutionGroupCount( CUDNN_ENFORCE(platform::dynload::cudnnSetConvolutionGroupCount(
cudnn_conv_desc, groups)); cudnn_conv_desc, groups));
cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor<T>( cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor<T>(
layout, framework::vectorize<int>(input->dims())); layout, framework::vectorize<int>(transformed_input.dims()));
cudnnTensorDescriptor_t cudnn_output_desc = output_desc.descriptor<T>( cudnnTensorDescriptor_t cudnn_output_desc = output_desc.descriptor<T>(
layout, framework::vectorize<int>(output->dims())); layout, framework::vectorize<int>(transformed_output.dims()));
cudnnFilterDescriptor_t cudnn_filter_desc = filter_desc.descriptor<T>( cudnnFilterDescriptor_t cudnn_filter_desc = filter_desc.descriptor<T>(
layout, framework::vectorize<int>(filter->dims())); layout, framework::vectorize<int>(filter->dims()));
// Now only support NCHW // Now only support NCHW
std::vector<int> bias_dim = {1, static_cast<int>(output->dims()[1]), 1, 1}; std::vector<int> bias_dim = {
1, static_cast<int>(transformed_output.dims()[1]), 1, 1};
cudnnTensorDescriptor_t cudnn_bias_desc = cudnnTensorDescriptor_t cudnn_bias_desc =
bias_desc.descriptor<T>(layout, bias_dim); bias_desc.descriptor<T>(layout, bias_dim);
cudnnActivationDescriptor_t cudnn_act_desc = cudnnActivationDescriptor_t cudnn_act_desc =
...@@ -109,7 +199,7 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> { ...@@ -109,7 +199,7 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
CUDNN_ENFORCE(platform::dynload::cudnnSetConvolutionMathType( CUDNN_ENFORCE(platform::dynload::cudnnSetConvolutionMathType(
cudnn_conv_desc, CUDNN_DEFAULT_MATH)); cudnn_conv_desc, CUDNN_DEFAULT_MATH));
auto x_dims = framework::vectorize(input->dims()); auto x_dims = framework::vectorize(transformed_input.dims());
auto f_dims = framework::vectorize(filter->dims()); auto f_dims = framework::vectorize(filter->dims());
if (!exhaustive_search) { if (!exhaustive_search) {
CUDNN_ENFORCE(platform::dynload::cudnnGetConvolutionForwardAlgorithm( CUDNN_ENFORCE(platform::dynload::cudnnGetConvolutionForwardAlgorithm(
......
...@@ -23,23 +23,47 @@ from op_test import OpTest ...@@ -23,23 +23,47 @@ from op_test import OpTest
from test_conv2d_op import conv2d_forward_naive from test_conv2d_op import conv2d_forward_naive
def create_test_padding_SAME_class(parent):
class TestPaddingSMAECase(parent):
def init_paddings(self):
self.pad = [0, 0]
self.padding_algorithm = "SAME"
cls_name = "{0}_{1}".format(parent.__name__, "PaddingSAMEOp")
TestPaddingSMAECase.__name__ = cls_name
globals()[cls_name] = TestPaddingSMAECase
def create_test_padding_VALID_class(parent):
class TestPaddingVALIDCase(parent):
def init_paddings(self):
self.pad = [1, 1]
self.padding_algorithm = "VALID"
cls_name = "{0}_{1}".format(parent.__name__, "PaddingVALIDOp")
TestPaddingVALIDCase.__name__ = cls_name
globals()[cls_name] = TestPaddingVALIDCase
class TestConv2dFusionOp(OpTest): class TestConv2dFusionOp(OpTest):
def setUp(self): def setUp(self):
self.op_type = "conv2d_fusion" self.op_type = "conv2d_fusion"
self.exhaustive_search = False self.exhaustive_search = False
self.data_format = "AnyLayout" self.data_format = "NCHW"
self.dtype = np.float32 self.dtype = np.float32
self.activation = 'relu' self.activation = 'relu'
self.add_bias = True self.add_bias = True
self.add_residual_data = True self.add_residual_data = True
self.channels = None self.channels = None
self.outputs = None self.outputs = None
self.padding_algorithm = "EXIPLICIT"
self.init_group() self.init_group()
self.init_dilation() self.init_dilation()
self.init_test_case() self.init_test_case()
self.init_bias_residual() self.init_bias_residual()
self.init_activation() self.init_activation()
self.init_paddings()
self.set_search_method() self.set_search_method()
conv2d_param = { conv2d_param = {
...@@ -52,7 +76,9 @@ class TestConv2dFusionOp(OpTest): ...@@ -52,7 +76,9 @@ class TestConv2dFusionOp(OpTest):
filter = np.random.random(self.filter_size).astype(self.dtype) filter = np.random.random(self.filter_size).astype(self.dtype)
self.output, _, _, _, _ = conv2d_forward_naive( self.output, _, _, _, _ = conv2d_forward_naive(
input, filter, self.groups, conv2d_param) input, filter, self.groups, conv2d_param, self.padding_algorithm,
self.data_format)
self.output = self.output.astype(self.dtype) self.output = self.output.astype(self.dtype)
self.inputs = { self.inputs = {
...@@ -84,7 +110,8 @@ class TestConv2dFusionOp(OpTest): ...@@ -84,7 +110,8 @@ class TestConv2dFusionOp(OpTest):
'data_format': self.data_format, 'data_format': self.data_format,
'exhaustive_search': self.exhaustive_search, 'exhaustive_search': self.exhaustive_search,
'activation': self.activation, 'activation': self.activation,
'split_channels': self.channels 'split_channels': self.channels,
'padding_algorithm': self.padding_algorithm
} }
self.outputs = {'Output': self.output} self.outputs = {'Output': self.output}
...@@ -127,6 +154,10 @@ class TestConv2dFusionOp(OpTest): ...@@ -127,6 +154,10 @@ class TestConv2dFusionOp(OpTest):
def set_outputs(self): def set_outputs(self):
pass pass
def init_paddings(self):
self.pad = [0, 0]
self.padding_algorithm = "EXPLICIT"
class TestWithoutResidual(TestConv2dFusionOp): class TestWithoutResidual(TestConv2dFusionOp):
def init_bias_residual(self): def init_bias_residual(self):
...@@ -186,5 +217,156 @@ class TestMultipleOutputs(TestConv2dFusionOp): ...@@ -186,5 +217,156 @@ class TestMultipleOutputs(TestConv2dFusionOp):
self.outputs['Outputs'] = [('out1', out1), ('out2', out2)] self.outputs['Outputs'] = [('out1', out1), ('out2', out2)]
class TestAsyPadding(TestConv2dFusionOp):
def init_paddings(self):
self.pad = [0, 0, 1, 2]
self.padding_algorithm = "EXPLICIT"
class TestWithPad_AsyPadding(TestConv2dFusionOp):
def init_test_case(self):
self.stride = [1, 1]
self.input_size = [2, 3, 10, 10] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 3, 3]
def init_paddings(self):
self.pad = [2, 1, 3, 2]
self.padding_algorithm = "EXPLICIT"
class TestWithStride_AsyPadding(TestConv2dFusionOp):
def init_test_case(self):
self.stride = [2, 2]
self.input_size = [2, 3, 6, 6] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 3, 3]
def init_paddings(self):
self.pad = [2, 1, 3, 2]
self.padding_algorithm = "EXPLICIT"
class TestWith1x1_AsyPadding(TestConv2dFusionOp):
def init_test_case(self):
self.stride = [1, 1]
self.input_size = [2, 3, 5, 5] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 1, 1]
def init_group(self):
self.groups = 3
def init_paddings(self):
self.pad = [2, 2, 4, 0]
self.padding_algorithm = "EXPLICIT"
class TestWithGroup_AsyPadding(TestConv2dFusionOp):
def init_group(self):
self.groups = 3
class TestWithDepthWise3x3_AsyPadding(TestConv2dFusionOp):
def init_test_case(self):
self.stride = [1, 1]
self.input_size = [3, 4, 10, 10] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [8, f_c, 3, 3]
def init_dilation(self):
self.dilations = [2, 2]
def init_group(self):
self.groups = 4
def init_paddings(self):
self.pad = [1, 3, 2, 1]
self.padding_algorithm = "EXPLICIT"
class TestWithDepthWise5x5_AsyPadding(TestConv2dFusionOp):
def init_test_case(self):
self.stride = [1, 1]
self.input_size = [2, 4, 10, 10] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [8, f_c, 5, 5]
def init_group(self):
self.groups = 4
def init_paddings(self):
self.pad = [0, 1, 1, 0]
self.padding_algorithm = "EXPLICIT"
class TestWithDepthWise7x7_AsyPadding(TestConv2dFusionOp):
def init_test_case(self):
self.stride = [2, 2]
self.input_size = [2, 8, 10, 10] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [16, f_c, 7, 7]
def init_group(self):
self.groups = 8
def init_paddings(self):
self.pad = [1, 3, 4, 1]
self.padding_algorithm = "EXPLICIT"
class TestWithDilation_AsyPadding(TestConv2dFusionOp):
def init_test_case(self):
self.stride = [1, 1]
self.input_size = [2, 3, 10, 10] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 3, 3]
def init_dilation(self):
self.dilations = [2, 2]
def init_group(self):
self.groups = 3
def init_paddings(self):
self.pad = [0, 1, 3, 0]
self.padding_algorithm = "EXPLICIT"
class TestWithInput1x1Filter1x1_AsyPadding(TestConv2dFusionOp):
def init_test_case(self):
self.stride = [1, 1]
self.input_size = [2, 3, 1, 1] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 1, 1]
def init_group(self):
self.groups = 3
def init_paddings(self):
self.pad = [0, 3, 4, 0]
self.padding_algorithm = "EXPLICIT"
create_test_padding_SAME_class(TestAsyPadding)
create_test_padding_SAME_class(TestWithPad_AsyPadding)
create_test_padding_SAME_class(TestWithStride_AsyPadding)
create_test_padding_SAME_class(TestWithGroup_AsyPadding)
create_test_padding_SAME_class(TestWithInput1x1Filter1x1_AsyPadding)
create_test_padding_VALID_class(TestAsyPadding)
create_test_padding_VALID_class(TestWithPad_AsyPadding)
create_test_padding_VALID_class(TestWithStride_AsyPadding)
create_test_padding_VALID_class(TestWithGroup_AsyPadding)
create_test_padding_VALID_class(TestWithInput1x1Filter1x1_AsyPadding)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册