diff --git a/mace/kernels/conv_2d.h b/mace/kernels/conv_2d.h index a457edfb2354eae313c0ba14ea9a467602af6680..b9bf8a983bd4bbafca9e16a52e576bcbf378924c 100644 --- a/mace/kernels/conv_2d.h +++ b/mace/kernels/conv_2d.h @@ -228,11 +228,12 @@ struct Conv2dFunctor : Conv2dFunctorBase { MACE_CHECK_NOTNULL(output); std::vector output_shape(4); - if (paddings_.empty()) { - paddings_.resize(2); - kernels::CalcNHWCPaddingAndOutputSize( - input->shape().data(), filter->shape().data(), dilations_, strides_, - padding_type_, output_shape.data(), paddings_.data()); + std::vector paddings(2); + kernels::CalcNHWCPaddingAndOutputSize( + input->shape().data(), filter->shape().data(), dilations_, strides_, + padding_type_, output_shape.data(), paddings.data()); + if (!paddings_.empty()) { + paddings = paddings_; } output->Resize(output_shape); @@ -260,13 +261,13 @@ struct Conv2dFunctor : Conv2dFunctorBase { MACE_CHECK(batch == input_batch, "Input/Output batch size mismatch"); - int padded_height = input_height + paddings_[0]; - int padded_width = input_width + paddings_[1]; + int padded_height = input_height + paddings[0]; + int padded_width = input_width + paddings[1]; Tensor padded_input; // Keep this alive during kernel execution - if (paddings_[0] > 0 || paddings_[1] > 0) { - ConstructNHWCInputWithPadding(input, paddings_.data(), &padded_input); + if (paddings[0] > 0 || paddings[1] > 0) { + ConstructNHWCInputWithPadding(input, paddings.data(), &padded_input); input = &padded_input; } diff --git a/mace/kernels/depthwise_conv2d.h b/mace/kernels/depthwise_conv2d.h index 65e8b500b4b3f8afc24d5a11698d060fed510709..c72a4a6d59ff68e5a94a539c6c85c782f4aa9d1f 100644 --- a/mace/kernels/depthwise_conv2d.h +++ b/mace/kernels/depthwise_conv2d.h @@ -294,11 +294,12 @@ struct DepthwiseConv2dFunctor : public DepthwiseConv2dFunctorBase { fake_filter_shape[3] = 1; std::vector output_shape(4); - if (paddings_.empty()) { - paddings_.resize(2); - kernels::CalcNHWCPaddingAndOutputSize( - input->shape().data(), fake_filter_shape.data(), dilations_, strides_, - padding_type_, output_shape.data(), paddings_.data()); + std::vector paddings(2); + kernels::CalcNHWCPaddingAndOutputSize( + input->shape().data(), fake_filter_shape.data(), dilations_, strides_, + padding_type_, output_shape.data(), paddings.data()); + if (!paddings_.empty()) { + paddings = paddings_; } auto input_shape = fake_filter_shape; output->Resize(output_shape); @@ -329,10 +330,10 @@ struct DepthwiseConv2dFunctor : public DepthwiseConv2dFunctorBase { MACE_CHECK(batch == input_batch, "Input/Output batch size mismatch"); // The left-upper most offset of the padded input - int paddings_top = paddings_[0] / 2; - int paddings_bottom = paddings_[0] - paddings_top; - int paddings_left = paddings_[1] / 2; - int paddings_right = paddings_[1] - paddings_left; + int paddings_top = paddings[0] / 2; + int paddings_bottom = paddings[0] - paddings_top; + int paddings_left = paddings[1] / 2; + int paddings_right = paddings[1] - paddings_left; int padded_h_start = 0 - paddings_top; int padded_w_start = 0 - paddings_left; diff --git a/mace/kernels/opencl/conv_2d_opencl.cc b/mace/kernels/opencl/conv_2d_opencl.cc index 48e775e1ad3476d6f7b5adcaadb905db1d6d4e38..8c0733b341697b965f7b804c625d035b51dec6f4 100644 --- a/mace/kernels/opencl/conv_2d_opencl.cc +++ b/mace/kernels/opencl/conv_2d_opencl.cc @@ -68,7 +68,8 @@ void Conv2dFunctor::operator()(const Tensor *input, index_t kernel_h = filter->dim(0); index_t kernel_w = filter->dim(1); - if (!input->is_image() || strides_[0] != strides_[1] || strides_[0] > 2 || + if (!input->is_image() || strides_[0] != strides_[1] || + ((kernel_h == 1 || kernel_h == 3) && strides_[0] > 2) || (dilations_[0] > 1 && (strides_[0] > 1 || kernel_h == 1))) { LOG(WARNING) << "OpenCL conv2d kernel with " << "filter" << kernel_h << "x" << kernel_w << "," @@ -80,11 +81,12 @@ void Conv2dFunctor::operator()(const Tensor *input, } std::vector output_shape(4); - if (paddings_.empty()) { - paddings_.resize(2); - kernels::CalcNHWCPaddingAndOutputSize( - input->shape().data(), filter->shape().data(), dilations_, strides_, - padding_type_, output_shape.data(), paddings_.data()); + std::vector paddings(2); + kernels::CalcNHWCPaddingAndOutputSize( + input->shape().data(), filter->shape().data(), dilations_, strides_, + padding_type_, output_shape.data(), paddings.data()); + if (!paddings_.empty()) { + paddings = paddings_; } std::vector output_image_shape; @@ -95,11 +97,11 @@ void Conv2dFunctor::operator()(const Tensor *input, selector[kernel_h - 1] != nullptr && 0 < strides_[0] && strides_[0] < 3 ) { auto conv2d_func = selector[kernel_h - 1]; - conv2d_func(&kernel_, input, filter, bias, strides_[0], paddings_.data(), dilations_, activation_, + conv2d_func(&kernel_, input, filter, bias, strides_[0], paddings.data(), dilations_, activation_, relux_max_limit_, prelu_alpha_, DataTypeToEnum::value, output, future); } else { - Conv2dOpencl(&kernel_, input, filter, bias, strides_[0], paddings_.data(), dilations_, + Conv2dOpencl(&kernel_, input, filter, bias, strides_[0], paddings.data(), dilations_, activation_, relux_max_limit_, prelu_alpha_, DataTypeToEnum::value, output, future); } diff --git a/mace/kernels/opencl/depthwise_conv_opencl.cc b/mace/kernels/opencl/depthwise_conv_opencl.cc index 440b18b43cbf2060bcefd176e9d647b5bb66b2a7..67304bd896bd5e5df14c273c2a839dccfea28390 100644 --- a/mace/kernels/opencl/depthwise_conv_opencl.cc +++ b/mace/kernels/opencl/depthwise_conv_opencl.cc @@ -153,18 +153,19 @@ void DepthwiseConv2dFunctor::operator()( fake_filter_shape[3] = 1; std::vector output_shape(4); - if (paddings_.empty()) { - paddings_.resize(2); - kernels::CalcNHWCPaddingAndOutputSize( - input->shape().data(), fake_filter_shape.data(), dilations_, strides_, - padding_type_, output_shape.data(), paddings_.data()); + std::vector paddings(2); + kernels::CalcNHWCPaddingAndOutputSize( + input->shape().data(), fake_filter_shape.data(), dilations_, strides_, + padding_type_, output_shape.data(), paddings.data()); + if (!paddings_.empty()) { + paddings = paddings_; } std::vector output_image_shape; CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, output_image_shape); output->ResizeImage(output_shape, output_image_shape); - DepthwiseConv2d(&kernel_, input, filter, bias, strides_[0], paddings_.data(), dilations_, + DepthwiseConv2d(&kernel_, input, filter, bias, strides_[0], paddings.data(), dilations_, activation_, relux_max_limit_, prelu_alpha_, DataTypeToEnum::value, output, future); } diff --git a/mace/kernels/opencl/pooling_opencl.cc b/mace/kernels/opencl/pooling_opencl.cc index 39bb994ef8ed0f6b27740f8d5d89ac75da57e2bd..9b612e48a558599751b7bde26df063689ea54c6a 100644 --- a/mace/kernels/opencl/pooling_opencl.cc +++ b/mace/kernels/opencl/pooling_opencl.cc @@ -23,12 +23,13 @@ void PoolingFunctor::operator()(const Tensor *input, input->dim(3), input->dim(3) }; - if (paddings_.empty()) { - paddings_.resize(2); - kernels::CalcNHWCPaddingAndOutputSize( - input->shape().data(), filter_shape.data(), - dilations_, strides_, this->padding_type_, - output_shape.data(), paddings_.data()); + std::vector paddings(2); + kernels::CalcNHWCPaddingAndOutputSize( + input->shape().data(), filter_shape.data(), + dilations_, strides_, this->padding_type_, + output_shape.data(), paddings.data()); + if (!paddings_.empty()) { + paddings = paddings_; } std::vector output_image_shape; @@ -66,8 +67,8 @@ void PoolingFunctor::operator()(const Tensor *input, kernel_.setArg(idx++, static_cast(input->dim(1))); kernel_.setArg(idx++, static_cast(input->dim(2))); kernel_.setArg(idx++, static_cast(out_height)); - kernel_.setArg(idx++, paddings_[0] / 2); - kernel_.setArg(idx++, paddings_[1] / 2); + kernel_.setArg(idx++, paddings[0] / 2); + kernel_.setArg(idx++, paddings[1] / 2); kernel_.setArg(idx++, strides_[0]); kernel_.setArg(idx++, kernels_[0]); kernel_.setArg(idx++, *(static_cast(output->buffer()))); diff --git a/mace/kernels/opencl/winograd_transform.cc b/mace/kernels/opencl/winograd_transform.cc index 0396af97fe3c4571a8906c1dd5f97c7004436357..31ca09f482d75999eb79aa59fca27f5bd0e9929d 100644 --- a/mace/kernels/opencl/winograd_transform.cc +++ b/mace/kernels/opencl/winograd_transform.cc @@ -17,11 +17,12 @@ void WinogradTransformFunctor::operator()(const Tensor *i StatsFuture *future) { std::vector output_shape(4); std::vector filter_shape = {3, 3, input_tensor->dim(3), 1}; - if (paddings_.empty()) { - paddings_.resize(2); - kernels::CalcNHWCPaddingAndOutputSize( - input_tensor->shape().data(), filter_shape.data(), dilations_.data(), - strides_.data(), padding_type_, output_shape.data(), paddings_.data()); + std::vector paddings(2); + kernels::CalcNHWCPaddingAndOutputSize( + input_tensor->shape().data(), filter_shape.data(), dilations_.data(), + strides_.data(), padding_type_, output_shape.data(), paddings.data()); + if (!paddings_.empty()) { + paddings = paddings_; } const index_t round_h = (output_shape[1] + 1) / 2; @@ -52,8 +53,8 @@ void WinogradTransformFunctor::operator()(const Tensor *i kernel_.setArg(idx++, static_cast(input_tensor->dim(3))); kernel_.setArg(idx++, static_cast(round_h * round_w)); kernel_.setArg(idx++, static_cast(round_w)); - kernel_.setArg(idx++, static_cast(paddings_[0] / 2)); - kernel_.setArg(idx++, static_cast(paddings_[1] / 2)); + kernel_.setArg(idx++, static_cast(paddings[0] / 2)); + kernel_.setArg(idx++, static_cast(paddings[1] / 2)); } const uint32_t gws[2] = {static_cast(out_width), diff --git a/mace/kernels/pooling.h b/mace/kernels/pooling.h index 4c62ba4e8fac7a73d85acaad3ddf8f83e36de592..24455b924c22a2b34cf304d74f58df23ed1b7674 100644 --- a/mace/kernels/pooling.h +++ b/mace/kernels/pooling.h @@ -64,13 +64,13 @@ struct PoolingFunctor : PoolingFunctorBase { input_tensor->dim(3), input_tensor->dim(3) }; - if (paddings_.empty()) { - paddings_.resize(2); - kernels::CalcNHWCPaddingAndOutputSize( - input_tensor->shape().data(), filter_shape.data(), - dilations_, strides_, this->padding_type_, - output_shape.data(), paddings_.data()); - + std::vector paddings(2); + kernels::CalcNHWCPaddingAndOutputSize( + input_tensor->shape().data(), filter_shape.data(), + dilations_, strides_, this->padding_type_, + output_shape.data(), paddings.data()); + if (!paddings_.empty()) { + paddings = paddings_; } output_tensor->Resize(output_shape); @@ -99,8 +99,8 @@ struct PoolingFunctor : PoolingFunctorBase { int dilation_w = dilations_[1]; // The left-upper most offset of the padded input - int padded_h_start = 0 - paddings_[0] / 2; - int padded_w_start = 0 - paddings_[1] / 2; + int padded_h_start = 0 - paddings[0] / 2; + int padded_w_start = 0 - paddings[1] / 2; if (pooling_type_ == MAX) { #pragma omp parallel for collapse(4)