提交 c5bc6a5a 编写于 作者: L liuqi

Convolution support arbitrary padding value.

上级 123f4938
...@@ -178,12 +178,14 @@ void Conv2dKernelFunc(const T *input_ptr, // batch start ...@@ -178,12 +178,14 @@ void Conv2dKernelFunc(const T *input_ptr, // batch start
struct Conv2dFunctorBase { struct Conv2dFunctorBase {
Conv2dFunctorBase(const int *strides, Conv2dFunctorBase(const int *strides,
const Padding &paddings, const Padding &padding_type,
const std::vector<int> &paddings,
const int *dilations, const int *dilations,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float prelu_alpha) const float prelu_alpha)
: strides_(strides), : strides_(strides),
padding_type_(padding_type),
paddings_(paddings), paddings_(paddings),
dilations_(dilations), dilations_(dilations),
activation_(activation), activation_(activation),
...@@ -191,7 +193,8 @@ struct Conv2dFunctorBase { ...@@ -191,7 +193,8 @@ struct Conv2dFunctorBase {
prelu_alpha_(prelu_alpha) {} prelu_alpha_(prelu_alpha) {}
const int *strides_; // [stride_h, stride_w] const int *strides_; // [stride_h, stride_w]
const Padding paddings_; const Padding padding_type_;
std::vector<int> paddings_;
const int *dilations_; // [dilation_h, dilation_w] const int *dilations_; // [dilation_h, dilation_w]
const ActivationType activation_; const ActivationType activation_;
const float relux_max_limit_; const float relux_max_limit_;
...@@ -201,12 +204,14 @@ struct Conv2dFunctorBase { ...@@ -201,12 +204,14 @@ struct Conv2dFunctorBase {
template <DeviceType D, typename T> template <DeviceType D, typename T>
struct Conv2dFunctor : Conv2dFunctorBase { struct Conv2dFunctor : Conv2dFunctorBase {
Conv2dFunctor(const int *strides, Conv2dFunctor(const int *strides,
const Padding &paddings, const Padding &padding_type,
const std::vector<int> &paddings,
const int *dilations, const int *dilations,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float prelu_alpha) const float prelu_alpha)
: Conv2dFunctorBase(strides, : Conv2dFunctorBase(strides,
padding_type,
paddings, paddings,
dilations, dilations,
activation, activation,
...@@ -223,10 +228,12 @@ struct Conv2dFunctor : Conv2dFunctorBase { ...@@ -223,10 +228,12 @@ struct Conv2dFunctor : Conv2dFunctorBase {
MACE_CHECK_NOTNULL(output); MACE_CHECK_NOTNULL(output);
std::vector<index_t> output_shape(4); std::vector<index_t> output_shape(4);
std::vector<int> paddings(2); if (paddings_.empty()) {
kernels::CalcNHWCPaddingAndOutputSize( paddings_.resize(2);
input->shape().data(), filter->shape().data(), dilations_, strides_, kernels::CalcNHWCPaddingAndOutputSize(
paddings_, output_shape.data(), paddings.data()); input->shape().data(), filter->shape().data(), dilations_, strides_,
padding_type_, output_shape.data(), paddings_.data());
}
output->Resize(output_shape); output->Resize(output_shape);
int batch = output->dim(0); int batch = output->dim(0);
...@@ -253,13 +260,13 @@ struct Conv2dFunctor : Conv2dFunctorBase { ...@@ -253,13 +260,13 @@ struct Conv2dFunctor : Conv2dFunctorBase {
MACE_CHECK(batch == input_batch, "Input/Output batch size mismatch"); MACE_CHECK(batch == input_batch, "Input/Output batch size mismatch");
int padded_height = input_height + paddings[0]; int padded_height = input_height + paddings_[0];
int padded_width = input_width + paddings[1]; int padded_width = input_width + paddings_[1];
Tensor padded_input; Tensor padded_input;
// Keep this alive during kernel execution // Keep this alive during kernel execution
if (paddings[0] > 0 || paddings[1] > 0) { if (paddings_[0] > 0 || paddings_[1] > 0) {
ConstructNHWCInputWithPadding(input, paddings.data(), &padded_input); ConstructNHWCInputWithPadding(input, paddings_.data(), &padded_input);
input = &padded_input; input = &padded_input;
} }
...@@ -625,12 +632,14 @@ void Conv2dFunctor<DeviceType::NEON, float>::operator()(const Tensor *input, ...@@ -625,12 +632,14 @@ void Conv2dFunctor<DeviceType::NEON, float>::operator()(const Tensor *input,
template <typename T> template <typename T>
struct Conv2dFunctor<DeviceType::OPENCL, T> : Conv2dFunctorBase { struct Conv2dFunctor<DeviceType::OPENCL, T> : Conv2dFunctorBase {
Conv2dFunctor(const int *strides, Conv2dFunctor(const int *strides,
const Padding &paddings, const Padding &padding_type,
const std::vector<int> &paddings,
const int *dilations, const int *dilations,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float prelu_alpha) const float prelu_alpha)
: Conv2dFunctorBase(strides, : Conv2dFunctorBase(strides,
padding_type,
paddings, paddings,
dilations, dilations,
activation, activation,
......
...@@ -237,20 +237,23 @@ void DepthwiseConv2dNoOOBCheckKernel(const T *input_ptr, ...@@ -237,20 +237,23 @@ void DepthwiseConv2dNoOOBCheckKernel(const T *input_ptr,
struct DepthwiseConv2dFunctorBase { struct DepthwiseConv2dFunctorBase {
DepthwiseConv2dFunctorBase(const int *strides, DepthwiseConv2dFunctorBase(const int *strides,
const Padding padding, const Padding padding_type,
const std::vector<int> &paddings,
const int *dilations, const int *dilations,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float prelu_alpha) const float prelu_alpha)
: strides_(strides), : strides_(strides),
padding_(padding), padding_type_(padding_type),
paddings_(paddings),
dilations_(dilations), dilations_(dilations),
activation_(activation), activation_(activation),
relux_max_limit_(relux_max_limit), relux_max_limit_(relux_max_limit),
prelu_alpha_(prelu_alpha) {} prelu_alpha_(prelu_alpha) {}
const int *strides_; // [stride_h, stride_w] const int *strides_; // [stride_h, stride_w]
const Padding padding_; const Padding padding_type_;
std::vector<int> paddings_;
const int *dilations_; // [dilation_h, dilation_w] const int *dilations_; // [dilation_h, dilation_w]
const ActivationType activation_; const ActivationType activation_;
const float relux_max_limit_; const float relux_max_limit_;
...@@ -260,13 +263,15 @@ struct DepthwiseConv2dFunctorBase { ...@@ -260,13 +263,15 @@ struct DepthwiseConv2dFunctorBase {
template <DeviceType D, typename T> template <DeviceType D, typename T>
struct DepthwiseConv2dFunctor : public DepthwiseConv2dFunctorBase { struct DepthwiseConv2dFunctor : public DepthwiseConv2dFunctorBase {
DepthwiseConv2dFunctor(const int *strides, DepthwiseConv2dFunctor(const int *strides,
const Padding padding, const Padding padding_type,
const std::vector<int> &paddings,
const int *dilations, const int *dilations,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float prelu_alpha) const float prelu_alpha)
: DepthwiseConv2dFunctorBase(strides, : DepthwiseConv2dFunctorBase(strides,
padding, padding_type,
paddings,
dilations, dilations,
activation, activation,
relux_max_limit, relux_max_limit,
...@@ -289,10 +294,12 @@ struct DepthwiseConv2dFunctor : public DepthwiseConv2dFunctorBase { ...@@ -289,10 +294,12 @@ struct DepthwiseConv2dFunctor : public DepthwiseConv2dFunctorBase {
fake_filter_shape[3] = 1; fake_filter_shape[3] = 1;
std::vector<index_t> output_shape(4); std::vector<index_t> output_shape(4);
std::vector<int> paddings(2); if (paddings_.empty()) {
kernels::CalcNHWCPaddingAndOutputSize( paddings_.resize(2);
input->shape().data(), fake_filter_shape.data(), dilations_, strides_, kernels::CalcNHWCPaddingAndOutputSize(
padding_, output_shape.data(), paddings.data()); input->shape().data(), fake_filter_shape.data(), dilations_, strides_,
padding_type_, output_shape.data(), paddings_.data());
}
auto input_shape = fake_filter_shape; auto input_shape = fake_filter_shape;
output->Resize(output_shape); output->Resize(output_shape);
...@@ -322,10 +329,10 @@ struct DepthwiseConv2dFunctor : public DepthwiseConv2dFunctorBase { ...@@ -322,10 +329,10 @@ struct DepthwiseConv2dFunctor : public DepthwiseConv2dFunctorBase {
MACE_CHECK(batch == input_batch, "Input/Output batch size mismatch"); MACE_CHECK(batch == input_batch, "Input/Output batch size mismatch");
// The left-upper most offset of the padded input // The left-upper most offset of the padded input
int paddings_top = paddings[0] / 2; int paddings_top = paddings_[0] / 2;
int paddings_bottom = paddings[0] - paddings_top; int paddings_bottom = paddings_[0] - paddings_top;
int paddings_left = paddings[1] / 2; int paddings_left = paddings_[1] / 2;
int paddings_right = paddings[1] - paddings_left; int paddings_right = paddings_[1] - paddings_left;
int padded_h_start = 0 - paddings_top; int padded_h_start = 0 - paddings_top;
int padded_w_start = 0 - paddings_left; int padded_w_start = 0 - paddings_left;
...@@ -413,13 +420,15 @@ template <typename T> ...@@ -413,13 +420,15 @@ template <typename T>
struct DepthwiseConv2dFunctor<DeviceType::OPENCL, T> struct DepthwiseConv2dFunctor<DeviceType::OPENCL, T>
: DepthwiseConv2dFunctorBase { : DepthwiseConv2dFunctorBase {
DepthwiseConv2dFunctor(const int *strides, DepthwiseConv2dFunctor(const int *strides,
const Padding padding, const Padding padding_type,
const std::vector<int> &paddings,
const int *dilations, const int *dilations,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float prelu_alpha) const float prelu_alpha)
: DepthwiseConv2dFunctorBase(strides, : DepthwiseConv2dFunctorBase(strides,
padding, padding_type,
paddings,
dilations, dilations,
activation, activation,
relux_max_limit, relux_max_limit,
......
...@@ -80,10 +80,12 @@ void Conv2dFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input, ...@@ -80,10 +80,12 @@ void Conv2dFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
} }
std::vector<index_t> output_shape(4); std::vector<index_t> output_shape(4);
std::vector<int> paddings(2); if (paddings_.empty()) {
kernels::CalcNHWCPaddingAndOutputSize( paddings_.resize(2);
input->shape().data(), filter->shape().data(), dilations_, strides_, kernels::CalcNHWCPaddingAndOutputSize(
paddings_, output_shape.data(), paddings.data()); input->shape().data(), filter->shape().data(), dilations_, strides_,
padding_type_, output_shape.data(), paddings_.data());
}
std::vector<size_t> output_image_shape; std::vector<size_t> output_image_shape;
CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, output_image_shape); CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, output_image_shape);
...@@ -93,11 +95,11 @@ void Conv2dFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input, ...@@ -93,11 +95,11 @@ void Conv2dFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
selector[kernel_h - 1] != nullptr && selector[kernel_h - 1] != nullptr &&
0 < strides_[0] && strides_[0] < 3 ) { 0 < strides_[0] && strides_[0] < 3 ) {
auto conv2d_func = selector[kernel_h - 1]; auto conv2d_func = selector[kernel_h - 1];
conv2d_func(&kernel_, input, filter, bias, strides_[0], paddings.data(), dilations_, activation_, conv2d_func(&kernel_, input, filter, bias, strides_[0], paddings_.data(), dilations_, activation_,
relux_max_limit_, prelu_alpha_, DataTypeToEnum<T>::value, relux_max_limit_, prelu_alpha_, DataTypeToEnum<T>::value,
output, future); output, future);
} else { } else {
Conv2dOpencl(&kernel_, input, filter, bias, strides_[0], paddings.data(), dilations_, Conv2dOpencl(&kernel_, input, filter, bias, strides_[0], paddings_.data(), dilations_,
activation_, relux_max_limit_, prelu_alpha_, activation_, relux_max_limit_, prelu_alpha_,
DataTypeToEnum<T>::value, output, future); DataTypeToEnum<T>::value, output, future);
} }
......
...@@ -140,7 +140,7 @@ void DepthwiseConv2dFunctor<DeviceType::OPENCL, T>::operator()( ...@@ -140,7 +140,7 @@ void DepthwiseConv2dFunctor<DeviceType::OPENCL, T>::operator()(
<< " is not implemented yet, using slow version"; << " is not implemented yet, using slow version";
// TODO(heliangliang) The CPU/NEON kernel should map the buffer // TODO(heliangliang) The CPU/NEON kernel should map the buffer
DepthwiseConv2dFunctor<DeviceType::CPU, float>( DepthwiseConv2dFunctor<DeviceType::CPU, float>(
strides_, padding_, dilations_, activation_, relux_max_limit_, strides_, padding_type_, paddings_, dilations_, activation_, relux_max_limit_,
prelu_alpha_)(input, filter, bias, output, future); prelu_alpha_)(input, filter, bias, output, future);
return; return;
} }
...@@ -153,16 +153,18 @@ void DepthwiseConv2dFunctor<DeviceType::OPENCL, T>::operator()( ...@@ -153,16 +153,18 @@ void DepthwiseConv2dFunctor<DeviceType::OPENCL, T>::operator()(
fake_filter_shape[3] = 1; fake_filter_shape[3] = 1;
std::vector<index_t> output_shape(4); std::vector<index_t> output_shape(4);
std::vector<int> paddings(2); if (paddings_.empty()) {
kernels::CalcNHWCPaddingAndOutputSize( paddings_.resize(2);
input->shape().data(), fake_filter_shape.data(), dilations_, strides_, kernels::CalcNHWCPaddingAndOutputSize(
padding_, output_shape.data(), paddings.data()); input->shape().data(), fake_filter_shape.data(), dilations_, strides_,
padding_type_, output_shape.data(), paddings_.data());
}
std::vector<size_t> output_image_shape; std::vector<size_t> output_image_shape;
CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, output_image_shape); CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, output_image_shape);
output->ResizeImage(output_shape, output_image_shape); output->ResizeImage(output_shape, output_image_shape);
DepthwiseConv2d(&kernel_, input, filter, bias, strides_[0], paddings.data(), dilations_, DepthwiseConv2d(&kernel_, input, filter, bias, strides_[0], paddings_.data(), dilations_,
activation_, relux_max_limit_, prelu_alpha_, activation_, relux_max_limit_, prelu_alpha_,
DataTypeToEnum<T>::value, output, future); DataTypeToEnum<T>::value, output, future);
} }
......
...@@ -18,16 +18,18 @@ void PoolingFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input, ...@@ -18,16 +18,18 @@ void PoolingFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
MACE_CHECK(dilations_[0] == 1 && dilations_[1] == 1) MACE_CHECK(dilations_[0] == 1 && dilations_[1] == 1)
<< "Pooling opencl kernel not support dilation yet"; << "Pooling opencl kernel not support dilation yet";
std::vector<index_t> output_shape(4); std::vector<index_t> output_shape(4);
std::vector<int> paddings(2);
std::vector<index_t> filter_shape = { std::vector<index_t> filter_shape = {
kernels_[0], kernels_[1], kernels_[0], kernels_[1],
input->dim(3), input->dim(3) input->dim(3), input->dim(3)
}; };
kernels::CalcNHWCPaddingAndOutputSize( if (paddings_.empty()) {
input->shape().data(), filter_shape.data(), paddings_.resize(2);
dilations_, strides_, this->padding_, kernels::CalcNHWCPaddingAndOutputSize(
output_shape.data(), paddings.data()); input->shape().data(), filter_shape.data(),
dilations_, strides_, this->padding_type_,
output_shape.data(), paddings_.data());
}
std::vector<size_t> output_image_shape; std::vector<size_t> output_image_shape;
CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, output_image_shape); CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, output_image_shape);
...@@ -64,8 +66,8 @@ void PoolingFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input, ...@@ -64,8 +66,8 @@ void PoolingFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
kernel_.setArg(idx++, static_cast<int32_t>(input->dim(1))); kernel_.setArg(idx++, static_cast<int32_t>(input->dim(1)));
kernel_.setArg(idx++, static_cast<int32_t>(input->dim(2))); kernel_.setArg(idx++, static_cast<int32_t>(input->dim(2)));
kernel_.setArg(idx++, static_cast<int32_t>(out_height)); kernel_.setArg(idx++, static_cast<int32_t>(out_height));
kernel_.setArg(idx++, paddings[0] / 2); kernel_.setArg(idx++, paddings_[0] / 2);
kernel_.setArg(idx++, paddings[1] / 2); kernel_.setArg(idx++, paddings_[1] / 2);
kernel_.setArg(idx++, strides_[0]); kernel_.setArg(idx++, strides_[0]);
kernel_.setArg(idx++, kernels_[0]); kernel_.setArg(idx++, kernels_[0]);
kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(output->buffer()))); kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(output->buffer())));
......
...@@ -17,10 +17,12 @@ void WinogradTransformFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *i ...@@ -17,10 +17,12 @@ void WinogradTransformFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *i
StatsFuture *future) { StatsFuture *future) {
std::vector<index_t> output_shape(4); std::vector<index_t> output_shape(4);
std::vector<index_t> filter_shape = {3, 3, input_tensor->dim(3), 1}; std::vector<index_t> filter_shape = {3, 3, input_tensor->dim(3), 1};
std::vector<int> paddings(2); if (paddings_.empty()) {
kernels::CalcNHWCPaddingAndOutputSize( paddings_.resize(2);
input_tensor->shape().data(), filter_shape.data(), dilations_.data(), kernels::CalcNHWCPaddingAndOutputSize(
strides_.data(), paddings_, output_shape.data(), paddings.data()); input_tensor->shape().data(), filter_shape.data(), dilations_.data(),
strides_.data(), padding_type_, output_shape.data(), paddings_.data());
}
const index_t round_h = (output_shape[1] + 1) / 2; const index_t round_h = (output_shape[1] + 1) / 2;
const index_t round_w = (output_shape[2] + 1) / 2; const index_t round_w = (output_shape[2] + 1) / 2;
...@@ -50,8 +52,8 @@ void WinogradTransformFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *i ...@@ -50,8 +52,8 @@ void WinogradTransformFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *i
kernel_.setArg(idx++, static_cast<uint32_t>(input_tensor->dim(3))); kernel_.setArg(idx++, static_cast<uint32_t>(input_tensor->dim(3)));
kernel_.setArg(idx++, static_cast<uint32_t>(round_h * round_w)); kernel_.setArg(idx++, static_cast<uint32_t>(round_h * round_w));
kernel_.setArg(idx++, static_cast<uint32_t>(round_w)); kernel_.setArg(idx++, static_cast<uint32_t>(round_w));
kernel_.setArg(idx++, static_cast<uint32_t>(paddings[0] / 2)); kernel_.setArg(idx++, static_cast<uint32_t>(paddings_[0] / 2));
kernel_.setArg(idx++, static_cast<uint32_t>(paddings[1] / 2)); kernel_.setArg(idx++, static_cast<uint32_t>(paddings_[1] / 2));
} }
const uint32_t gws[2] = {static_cast<uint32_t>(out_width), const uint32_t gws[2] = {static_cast<uint32_t>(out_width),
......
...@@ -24,18 +24,21 @@ struct PoolingFunctorBase { ...@@ -24,18 +24,21 @@ struct PoolingFunctorBase {
PoolingFunctorBase(const PoolingType pooling_type, PoolingFunctorBase(const PoolingType pooling_type,
const int *kernels, const int *kernels,
const int *strides, const int *strides,
const Padding padding, const Padding padding_type,
const std::vector<int> &paddings,
const int *dilations) const int *dilations)
: pooling_type_(pooling_type), : pooling_type_(pooling_type),
kernels_(kernels), kernels_(kernels),
strides_(strides), strides_(strides),
padding_(padding), padding_type_(padding_type),
paddings_(paddings),
dilations_(dilations) {} dilations_(dilations) {}
const PoolingType pooling_type_; const PoolingType pooling_type_;
const int *kernels_; const int *kernels_;
const int *strides_; const int *strides_;
const Padding padding_; const Padding padding_type_;
std::vector<int> paddings_;
const int *dilations_; const int *dilations_;
}; };
...@@ -44,27 +47,31 @@ struct PoolingFunctor : PoolingFunctorBase { ...@@ -44,27 +47,31 @@ struct PoolingFunctor : PoolingFunctorBase {
PoolingFunctor(const PoolingType pooling_type, PoolingFunctor(const PoolingType pooling_type,
const int *kernels, const int *kernels,
const int *strides, const int *strides,
const Padding padding, const Padding padding_type,
const std::vector<int> &paddings,
const int *dilations) const int *dilations)
: PoolingFunctorBase(pooling_type, kernels, : PoolingFunctorBase(pooling_type, kernels,
strides, padding, strides, padding_type,
dilations) {} paddings, dilations) {}
void operator()(const Tensor *input_tensor, void operator()(const Tensor *input_tensor,
Tensor *output_tensor, Tensor *output_tensor,
StatsFuture *future) { StatsFuture *future) {
std::vector<index_t> output_shape(4); std::vector<index_t> output_shape(4);
std::vector<int> paddings(2);
std::vector<index_t> filter_shape = { std::vector<index_t> filter_shape = {
kernels_[0], kernels_[1], kernels_[0], kernels_[1],
input_tensor->dim(3), input_tensor->dim(3) input_tensor->dim(3), input_tensor->dim(3)
}; };
kernels::CalcNHWCPaddingAndOutputSize( if (paddings_.empty()) {
input_tensor->shape().data(), filter_shape.data(), paddings_.resize(2);
dilations_, strides_, this->padding_, kernels::CalcNHWCPaddingAndOutputSize(
output_shape.data(), paddings.data()); input_tensor->shape().data(), filter_shape.data(),
dilations_, strides_, this->padding_type_,
output_shape.data(), paddings_.data());
}
output_tensor->Resize(output_shape); output_tensor->Resize(output_shape);
Tensor::MappingGuard in_guard(input_tensor); Tensor::MappingGuard in_guard(input_tensor);
...@@ -92,8 +99,8 @@ struct PoolingFunctor : PoolingFunctorBase { ...@@ -92,8 +99,8 @@ struct PoolingFunctor : PoolingFunctorBase {
int dilation_w = dilations_[1]; int dilation_w = dilations_[1];
// The left-upper most offset of the padded input // The left-upper most offset of the padded input
int padded_h_start = 0 - paddings[0] / 2; int padded_h_start = 0 - paddings_[0] / 2;
int padded_w_start = 0 - paddings[1] / 2; int padded_w_start = 0 - paddings_[1] / 2;
if (pooling_type_ == MAX) { if (pooling_type_ == MAX) {
#pragma omp parallel for collapse(4) #pragma omp parallel for collapse(4)
...@@ -163,11 +170,12 @@ struct PoolingFunctor<DeviceType::OPENCL, T> : PoolingFunctorBase { ...@@ -163,11 +170,12 @@ struct PoolingFunctor<DeviceType::OPENCL, T> : PoolingFunctorBase {
PoolingFunctor(const PoolingType pooling_type, PoolingFunctor(const PoolingType pooling_type,
const int *kernels, const int *kernels,
const int *strides, const int *strides,
const Padding padding, const Padding padding_type,
const std::vector<int> &paddings,
const int *dilations) const int *dilations)
: PoolingFunctorBase(pooling_type, kernels, : PoolingFunctorBase(pooling_type, kernels,
strides, padding, strides, padding_type,
dilations) {} paddings, dilations) {}
void operator()(const Tensor *input_tensor, void operator()(const Tensor *input_tensor,
Tensor *output_tensor, Tensor *output_tensor,
StatsFuture *future); StatsFuture *future);
......
...@@ -15,18 +15,22 @@ namespace mace { ...@@ -15,18 +15,22 @@ namespace mace {
namespace kernels { namespace kernels {
struct WinogradTransformFunctorBase { struct WinogradTransformFunctorBase {
WinogradTransformFunctorBase(const Padding &paddings) WinogradTransformFunctorBase(const Padding &padding_type,
: strides_({1, 1}), dilations_({1, 1}), paddings_(paddings) {} const std::vector<int> &paddings)
: strides_({1, 1}), dilations_({1, 1}),
padding_type_(padding_type), paddings_(paddings) {}
const std::vector<int> strides_; // [stride_h, stride_w] const std::vector<int> strides_; // [stride_h, stride_w]
const std::vector<int> dilations_; // [dilation_h, dilation_w] const std::vector<int> dilations_; // [dilation_h, dilation_w]
Padding paddings_; Padding padding_type_;
std::vector<int> paddings_;
}; };
template<DeviceType D, typename T> template<DeviceType D, typename T>
struct WinogradTransformFunctor : WinogradTransformFunctorBase { struct WinogradTransformFunctor : WinogradTransformFunctorBase {
WinogradTransformFunctor(const Padding &paddings) WinogradTransformFunctor(const Padding &padding_type,
: WinogradTransformFunctorBase(paddings) {} const std::vector<int> &paddings)
: WinogradTransformFunctorBase(padding_type, paddings) {}
void operator()(const Tensor *input, void operator()(const Tensor *input,
Tensor *output, Tensor *output,
...@@ -38,8 +42,9 @@ struct WinogradTransformFunctor : WinogradTransformFunctorBase { ...@@ -38,8 +42,9 @@ struct WinogradTransformFunctor : WinogradTransformFunctorBase {
template<typename T> template<typename T>
struct WinogradTransformFunctor<DeviceType::OPENCL, T> : WinogradTransformFunctorBase { struct WinogradTransformFunctor<DeviceType::OPENCL, T> : WinogradTransformFunctorBase {
WinogradTransformFunctor(const Padding &paddings) WinogradTransformFunctor(const Padding &padding_type,
: WinogradTransformFunctorBase(paddings) {} const std::vector<int> &paddings)
: WinogradTransformFunctorBase(padding_type, paddings) {}
void operator()(const Tensor *input, void operator()(const Tensor *input,
Tensor *output, Tensor *output,
......
...@@ -26,7 +26,12 @@ class AddNOp : public Operator<D, T> { ...@@ -26,7 +26,12 @@ class AddNOp : public Operator<D, T> {
for (int i = 1; i < n; ++i) { for (int i = 1; i < n; ++i) {
inputs[i] = this->Input(i); inputs[i] = this->Input(i);
MACE_CHECK(inputs[0]->dim_size() == inputs[i]->dim_size()); MACE_CHECK(inputs[0]->dim_size() == inputs[i]->dim_size());
MACE_CHECK(inputs[0]->size() == inputs[i]->size()); MACE_CHECK(inputs[0]->size() == inputs[i]->size()) << "Input 0: "
<< MakeString(inputs[0]->shape())
<< ", size: " << inputs[0]->size()
<< ". Input " << i << ": "
<< MakeString(inputs[i]->shape())
<< ", size: " << inputs[i]->size();
} }
functor_(inputs, output_tensor, future); functor_(inputs, output_tensor, future);
......
...@@ -19,7 +19,8 @@ class Conv2dOp : public ConvPool2dOpBase<D, T> { ...@@ -19,7 +19,8 @@ class Conv2dOp : public ConvPool2dOpBase<D, T> {
Conv2dOp(const OperatorDef &op_def, Workspace *ws) Conv2dOp(const OperatorDef &op_def, Workspace *ws)
: ConvPool2dOpBase<D, T>(op_def, ws), : ConvPool2dOpBase<D, T>(op_def, ws),
functor_(this->strides_.data(), functor_(this->strides_.data(),
this->padding_, this->padding_type_,
this->paddings_,
this->dilations_.data(), this->dilations_.data(),
kernels::ActivationType::NOOP, kernels::ActivationType::NOOP,
0.0f, 0.0f,
......
...@@ -16,14 +16,16 @@ class ConvPool2dOpBase : public Operator<D, T> { ...@@ -16,14 +16,16 @@ class ConvPool2dOpBase : public Operator<D, T> {
ConvPool2dOpBase(const OperatorDef &op_def, Workspace *ws) ConvPool2dOpBase(const OperatorDef &op_def, Workspace *ws)
: Operator<D, T>(op_def, ws), : Operator<D, T>(op_def, ws),
strides_(OperatorBase::GetRepeatedArgument<int>("strides")), strides_(OperatorBase::GetRepeatedArgument<int>("strides")),
padding_(static_cast<Padding>(OperatorBase::GetSingleArgument<int>( padding_type_(static_cast<Padding>(OperatorBase::GetSingleArgument<int>(
"padding", static_cast<int>(SAME)))), "padding", static_cast<int>(SAME)))),
paddings_(OperatorBase::GetRepeatedArgument<int>("padding_values")),
dilations_( dilations_(
OperatorBase::GetRepeatedArgument<int>("dilations", {1, 1})) {} OperatorBase::GetRepeatedArgument<int>("dilations", {1, 1})) {}
protected: protected:
std::vector<int> strides_; std::vector<int> strides_;
Padding padding_; Padding padding_type_;
std::vector<int> paddings_;
std::vector<int> dilations_; std::vector<int> dilations_;
}; };
......
...@@ -20,7 +20,8 @@ class DepthwiseConv2dOp : public ConvPool2dOpBase<D, T> { ...@@ -20,7 +20,8 @@ class DepthwiseConv2dOp : public ConvPool2dOpBase<D, T> {
DepthwiseConv2dOp(const OperatorDef &op_def, Workspace *ws) DepthwiseConv2dOp(const OperatorDef &op_def, Workspace *ws)
: ConvPool2dOpBase<D, T>(op_def, ws), : ConvPool2dOpBase<D, T>(op_def, ws),
functor_(this->strides_.data(), functor_(this->strides_.data(),
this->padding_, this->padding_type_,
this->paddings_,
this->dilations_.data(), this->dilations_.data(),
kernels::StringToActivationType( kernels::StringToActivationType(
OperatorBase::GetSingleArgument<std::string>("activation", OperatorBase::GetSingleArgument<std::string>("activation",
......
...@@ -19,7 +19,8 @@ class FusedConv2dOp : public ConvPool2dOpBase<D, T> { ...@@ -19,7 +19,8 @@ class FusedConv2dOp : public ConvPool2dOpBase<D, T> {
FusedConv2dOp(const OperatorDef &op_def, Workspace *ws) FusedConv2dOp(const OperatorDef &op_def, Workspace *ws)
: ConvPool2dOpBase<D, T>(op_def, ws), : ConvPool2dOpBase<D, T>(op_def, ws),
functor_(this->strides_.data(), functor_(this->strides_.data(),
this->padding_, this->padding_type_,
this->paddings_,
this->dilations_.data(), this->dilations_.data(),
kernels::StringToActivationType( kernels::StringToActivationType(
OperatorBase::GetSingleArgument<std::string>("activation", OperatorBase::GetSingleArgument<std::string>("activation",
......
...@@ -21,7 +21,7 @@ class PoolingOp : public ConvPool2dOpBase<D, T> { ...@@ -21,7 +21,7 @@ class PoolingOp : public ConvPool2dOpBase<D, T> {
static_cast<PoolingType>(OperatorBase::GetSingleArgument<int>( static_cast<PoolingType>(OperatorBase::GetSingleArgument<int>(
"pooling_type", static_cast<int>(AVG)))), "pooling_type", static_cast<int>(AVG)))),
functor_(pooling_type_, kernels_.data(), this->strides_.data(), functor_(pooling_type_, kernels_.data(), this->strides_.data(),
this->padding_, this->dilations_.data()){}; this->padding_type_, this->paddings_, this->dilations_.data()){};
bool Run(StatsFuture *future) override { bool Run(StatsFuture *future) override {
const Tensor *input = this->Input(INPUT); const Tensor *input = this->Input(INPUT);
......
...@@ -18,7 +18,8 @@ class WinogradTransformOp : public Operator<D, T> { ...@@ -18,7 +18,8 @@ class WinogradTransformOp : public Operator<D, T> {
WinogradTransformOp(const OperatorDef &op_def, Workspace *ws) WinogradTransformOp(const OperatorDef &op_def, Workspace *ws)
: Operator<D, T>(op_def, ws), : Operator<D, T>(op_def, ws),
functor_(static_cast<Padding>(OperatorBase::GetSingleArgument<int>( functor_(static_cast<Padding>(OperatorBase::GetSingleArgument<int>(
"padding", static_cast<int>(VALID)))) {} "padding", static_cast<int>(VALID))),
OperatorBase::GetRepeatedArgument<int>("padding_values")) {}
bool Run(StatsFuture *future) override { bool Run(StatsFuture *future) override {
const Tensor *input_tensor = this->Input(INPUT); const Tensor *input_tensor = this->Input(INPUT);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册