diff --git a/mace/kernels/conv_2d.h b/mace/kernels/conv_2d.h index b9bf8a983bd4bbafca9e16a52e576bcbf378924c..91e1471ff662aa2e82e46a5103aa5d5214cbca77 100644 --- a/mace/kernels/conv_2d.h +++ b/mace/kernels/conv_2d.h @@ -229,11 +229,14 @@ struct Conv2dFunctor : Conv2dFunctorBase { std::vector output_shape(4); std::vector paddings(2); - kernels::CalcNHWCPaddingAndOutputSize( - input->shape().data(), filter->shape().data(), dilations_, strides_, - padding_type_, output_shape.data(), paddings.data()); - if (!paddings_.empty()) { + if (paddings_.empty()) { + kernels::CalcNHWCPaddingAndOutputSize( + input->shape().data(), filter->shape().data(), dilations_, strides_, + padding_type_, output_shape.data(), paddings.data()); + } else { paddings = paddings_; + CalcOutputSize(input->shape().data(), filter->shape().data(), paddings_.data(), + dilations_, strides_, RoundType::FLOOR, output_shape.data()); } output->Resize(output_shape); diff --git a/mace/kernels/conv_pool_2d_util.cc b/mace/kernels/conv_pool_2d_util.cc index fb009d790b3fa079030ed064502131d2ea6eac87..9b7160a7363df6b0883de821e25cf9fbc29ec33c 100644 --- a/mace/kernels/conv_pool_2d_util.cc +++ b/mace/kernels/conv_pool_2d_util.cc @@ -135,6 +135,44 @@ void CalcNHWCPaddingAndOutputSize(const index_t *input_shape, // NHWC output_shape[3] = output_channels; } +void CalcOutputSize(const index_t *input_shape, // NHWC + const index_t *filter_shape, // HWOI + const int *padding_size, + const int *dilations, + const int *strides, + const RoundType round_type, + index_t *output_shape) { + MACE_CHECK(dilations[0] > 0 && dilations[1] > 0, + "Invalid dilations, must >= 1"); + MACE_CHECK((dilations[0] == 1 || strides[0] == 1) && + (dilations[1] == 1 || strides[1] == 1), + "If dilations > 1, strides should be 1"); + MACE_CHECK_NOTNULL(output_shape); + MACE_CHECK_NOTNULL(padding_size); + /* + * Convlution arithmetic: + * o = floor((i + 2 * p - k - (k - 1) * (d - 1)) / s) + 1 + * Pooling arithmetic: + * o = ceil((i + 2 * p - k - (k - 1) * (d - 1)) / s) + 1 + * For details, see https://arxiv.org/pdf/1603.07285.pdf or + * http://deeplearning.net/software/theano/tutorial/conv_arithmetic.html + */ + output_shape[0] = input_shape[0]; + if (round_type == FLOOR) { + output_shape[1] = static_cast(std::floor(1.0 * (input_shape[1] + padding_size[0] + - filter_shape[0] - (filter_shape[0] - 1) * (dilations[0] - 1)) / strides[0]) + 1); + output_shape[2] = static_cast(std::floor(1.0 * (input_shape[2] + padding_size[1] + - filter_shape[1] - (filter_shape[1] - 1) * (dilations[1] - 1)) / strides[1]) + 1); + } else { + output_shape[1] = static_cast(std::ceil(1.0 * (input_shape[1] + padding_size[0] + - filter_shape[0] - (filter_shape[0] - 1) * (dilations[0] - 1)) / strides[0]) + 1); + output_shape[2] = static_cast(std::ceil(1.0 * (input_shape[2] + padding_size[1] + - filter_shape[1] - (filter_shape[1] - 1) * (dilations[1] - 1)) / strides[1]) + 1); + } + output_shape[3] = filter_shape[2]; + +} + void CalPaddingSize(const index_t *input_shape, // NCHW const index_t *filter_shape, // OIHW const int *dilations, diff --git a/mace/kernels/conv_pool_2d_util.h b/mace/kernels/conv_pool_2d_util.h index 87f9546f829c7ef81ba1275de8ee8d1c21ccb77d..ff3ec0d8a220401263caba9706d41141809645af 100644 --- a/mace/kernels/conv_pool_2d_util.h +++ b/mace/kernels/conv_pool_2d_util.h @@ -15,6 +15,11 @@ enum Padding { FULL = 2, // Pads with one less than the filter size on both sides }; +enum RoundType{ + FLOOR = 0, + CEIL = 1, +}; + namespace kernels { void CalcPaddingAndOutputSize(const index_t *input_shape, // NCHW @@ -33,6 +38,14 @@ void CalcNHWCPaddingAndOutputSize(const index_t *input_shape, // NCHW index_t *output_shape, int *padding_size); +void CalcOutputSize(const index_t *input_shape, // NHWC + const index_t *filter_shape, // HWOI + const int *padding_size, + const int *dilations, + const int *strides, + const RoundType round_type, + index_t *output_shape); + void CalPaddingSize(const index_t *input_shape, // NCHW const index_t *filter_shape, // OIHW const int *dilations, diff --git a/mace/kernels/depthwise_conv2d.h b/mace/kernels/depthwise_conv2d.h index c72a4a6d59ff68e5a94a539c6c85c782f4aa9d1f..ef78e17ee53e328372e7c92cfbb6b8f14edb2ecc 100644 --- a/mace/kernels/depthwise_conv2d.h +++ b/mace/kernels/depthwise_conv2d.h @@ -295,11 +295,14 @@ struct DepthwiseConv2dFunctor : public DepthwiseConv2dFunctorBase { std::vector output_shape(4); std::vector paddings(2); - kernels::CalcNHWCPaddingAndOutputSize( - input->shape().data(), fake_filter_shape.data(), dilations_, strides_, - padding_type_, output_shape.data(), paddings.data()); - if (!paddings_.empty()) { + if (paddings_.empty()) { + kernels::CalcNHWCPaddingAndOutputSize( + input->shape().data(), fake_filter_shape.data(), dilations_, strides_, + padding_type_, output_shape.data(), paddings.data()); + } else { paddings = paddings_; + CalcOutputSize(input->shape().data(), fake_filter_shape.data(), paddings_.data(), + dilations_, strides_, RoundType::FLOOR, output_shape.data()); } auto input_shape = fake_filter_shape; output->Resize(output_shape); diff --git a/mace/kernels/opencl/cl/conv_2d_1x1.cl b/mace/kernels/opencl/cl/conv_2d_1x1.cl index 0eecdb19757fddc00b3f9cb4855fd76f835a79b6..ebcbb23177924555b76b89b12111f45cb0f156f3 100644 --- a/mace/kernels/opencl/cl/conv_2d_1x1.cl +++ b/mace/kernels/opencl/cl/conv_2d_1x1.cl @@ -12,7 +12,8 @@ __kernel void conv_2d_1x1(__read_only image2d_t input, /* [c%4 * w * c/4, h * b] __private const int in_width, __private const int in_ch_blks, __private const int height, - __private const int width) { + __private const int width, + __private const int stride) { const int out_ch_blk = get_global_id(0); const int out_w_blk = get_global_id(1); const int out_w_blks = get_global_size(1); @@ -31,19 +32,12 @@ __kernel void conv_2d_1x1(__read_only image2d_t input, /* [c%4 * w * c/4, h * b] #endif int4 w; -#if STRIDE == 1 - w.x = out_w_blk; - w.y = w.x + out_w_blks; - w.z = w.y + out_w_blks; - w.w = w.z + out_w_blks; - int out_hb_idx = (out_hb % height); -#elif STRIDE == 2 - w.x = out_w_blk << 1; - w.y = (out_w_blk + out_w_blks) << 1; - w.z = (out_w_blk + (out_w_blks << 1)) << 1; - w.w = (out_w_blk + (out_w_blks << 1) + out_w_blks) << 1; - int out_hb_idx = (out_hb % height) << 1; -#endif + int in_width_stride = mul24(out_w_blks, stride); + w.x = mul24(out_w_blk, stride); + w.y = w.x + in_width_stride; + w.z = w.y + in_width_stride; + w.w = w.z + in_width_stride; + int out_hb_idx = mul24((out_hb % height), stride); w.x = select(w.x, INT_MIN, w.x >= in_width); w.y = select(w.y, INT_MIN, w.y >= in_width); diff --git a/mace/kernels/opencl/cl/conv_2d_3x3.cl b/mace/kernels/opencl/cl/conv_2d_3x3.cl index d37ec7f1e1fc8599c8fd00d9644d0f1251b3d16a..0fce82366e16bdbffab48b73c0038cba02dd48b8 100644 --- a/mace/kernels/opencl/cl/conv_2d_3x3.cl +++ b/mace/kernels/opencl/cl/conv_2d_3x3.cl @@ -13,6 +13,7 @@ __kernel void conv_2d_3x3(__read_only image2d_t input, /* [c%4 * w * c/4, h * b] __private const int in_ch_blks, __private const int out_height, __private const int out_width, + __private const int stride, __private const int padding_top, __private const int padding_left, __private const int dilation_h, @@ -38,21 +39,13 @@ __kernel void conv_2d_3x3(__read_only image2d_t input, /* [c%4 * w * c/4, h * b] DATA_TYPE4 out4 = 0; #endif -#if STRIDE == 1 - int in_width0 = out_w_blk - padding_left; - int in_width1 = in_width0 + out_w_blks; - int in_width2 = in_width1 + out_w_blks; - int in_width3 = in_width2 + out_w_blks; - int in_width4 = in_width3 + out_w_blks; - const int height_idx = (out_hb % out_height) - padding_top; -#elif STRIDE == 2 - int in_width0 = (out_w_blk << 1) - padding_left; - int in_width1 = ((out_w_blk + out_w_blks) << 1) - padding_left; - int in_width2 = ((out_w_blk + (out_w_blks << 1)) << 1) - padding_left; - int in_width3 = ((out_w_blk + (out_w_blks << 1) + out_w_blks) << 1) - padding_left; - int in_width4 = ((out_w_blk + (out_w_blks << 2)) << 1) - padding_left; - const int height_idx = ((out_hb % out_height) << 1) - padding_top; -#endif + int in_width_stride = mul24(out_w_blks, stride); + int in_width0 = mad24(out_w_blk, stride, -padding_left); + int in_width1 = in_width0 + in_width_stride; + int in_width2 = in_width1 + in_width_stride; + int in_width3 = in_width2 + in_width_stride; + int in_width4 = in_width3 + in_width_stride; + const int height_idx = mad24((out_hb % out_height), stride, -padding_top); const int batch_idx = mul24((out_hb / out_height), in_height); const int rounded_in_ch_x_3 = (rounded_in_ch << 1) + rounded_in_ch; diff --git a/mace/kernels/opencl/conv_2d_opencl.cc b/mace/kernels/opencl/conv_2d_opencl.cc index 8c0733b341697b965f7b804c625d035b51dec6f4..58c833cb206ff110854f34090a08db0c7bb23bdb 100644 --- a/mace/kernels/opencl/conv_2d_opencl.cc +++ b/mace/kernels/opencl/conv_2d_opencl.cc @@ -69,7 +69,6 @@ void Conv2dFunctor::operator()(const Tensor *input, index_t kernel_h = filter->dim(0); index_t kernel_w = filter->dim(1); if (!input->is_image() || strides_[0] != strides_[1] || - ((kernel_h == 1 || kernel_h == 3) && strides_[0] > 2) || (dilations_[0] > 1 && (strides_[0] > 1 || kernel_h == 1))) { LOG(WARNING) << "OpenCL conv2d kernel with " << "filter" << kernel_h << "x" << kernel_w << "," @@ -82,11 +81,14 @@ void Conv2dFunctor::operator()(const Tensor *input, std::vector output_shape(4); std::vector paddings(2); - kernels::CalcNHWCPaddingAndOutputSize( - input->shape().data(), filter->shape().data(), dilations_, strides_, - padding_type_, output_shape.data(), paddings.data()); - if (!paddings_.empty()) { + if (paddings_.empty()) { + kernels::CalcNHWCPaddingAndOutputSize( + input->shape().data(), filter->shape().data(), dilations_, strides_, + padding_type_, output_shape.data(), paddings.data()); + } else { paddings = paddings_; + CalcOutputSize(input->shape().data(), filter->shape().data(), paddings_.data(), + dilations_, strides_, RoundType::FLOOR, output_shape.data()); } std::vector output_image_shape; @@ -94,8 +96,7 @@ void Conv2dFunctor::operator()(const Tensor *input, output->ResizeImage(output_shape, output_image_shape); if (kernel_h == kernel_w && kernel_h <= 5 && - selector[kernel_h - 1] != nullptr && - 0 < strides_[0] && strides_[0] < 3 ) { + selector[kernel_h - 1] != nullptr) { auto conv2d_func = selector[kernel_h - 1]; conv2d_func(&kernel_, input, filter, bias, strides_[0], paddings.data(), dilations_, activation_, relux_max_limit_, prelu_alpha_, DataTypeToEnum::value, diff --git a/mace/kernels/opencl/conv_2d_opencl_1x1.cc b/mace/kernels/opencl/conv_2d_opencl_1x1.cc index aa4bbc6bfa47407339dd67f432471c34867e5110..a8dff64699ea3952d53b7026c6b1a66dc659ccda 100644 --- a/mace/kernels/opencl/conv_2d_opencl_1x1.cc +++ b/mace/kernels/opencl/conv_2d_opencl_1x1.cc @@ -44,7 +44,6 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel, built_options.emplace("-Dconv_2d_1x1=" + kernel_name); built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(dt)); built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt)); - built_options.emplace(MakeString("-DSTRIDE=", stride)); if (bias != nullptr) { built_options.emplace("-DBIAS"); } @@ -93,6 +92,7 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel, kernel->setArg(idx++, static_cast(input_channel_blocks)); kernel->setArg(idx++, static_cast(height)); kernel->setArg(idx++, static_cast(width)); + kernel->setArg(idx++, stride); } const uint32_t gws[3] = {static_cast(channel_blocks), diff --git a/mace/kernels/opencl/conv_2d_opencl_3x3.cc b/mace/kernels/opencl/conv_2d_opencl_3x3.cc index 3a185faf998fed974576fed9aa30587a4a3d0d4b..af2a2bc29d74196a7788c76f3c317c6a9ab897ff 100644 --- a/mace/kernels/opencl/conv_2d_opencl_3x3.cc +++ b/mace/kernels/opencl/conv_2d_opencl_3x3.cc @@ -42,7 +42,6 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel, built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(dt)); built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt)); built_options.emplace(bias != nullptr ? "-DBIAS" : ""); - built_options.emplace(MakeString("-DSTRIDE=", stride)); switch (activation) { case NOOP: break; @@ -87,6 +86,7 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel, kernel->setArg(idx++, static_cast(input_channel_blocks)); kernel->setArg(idx++, static_cast(height)); kernel->setArg(idx++, static_cast(width)); + kernel->setArg(idx++, stride); kernel->setArg(idx++, padding[0] / 2); kernel->setArg(idx++, padding[1] / 2); kernel->setArg(idx++, dilations[0]); diff --git a/mace/kernels/opencl/conv_2d_opencl_general.cc b/mace/kernels/opencl/conv_2d_opencl_general.cc index 30a1a75171bcd6805cfdad1d69233888d3922444..f9dd6d1db5010a8575492ae93357ff9e4c5ce062 100644 --- a/mace/kernels/opencl/conv_2d_opencl_general.cc +++ b/mace/kernels/opencl/conv_2d_opencl_general.cc @@ -42,7 +42,6 @@ extern void Conv2dOpencl(cl::Kernel *kernel, built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(dt)); built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt)); built_options.emplace(bias != nullptr ? "-DBIAS" : ""); - built_options.emplace(MakeString("-DSTRIDE=", stride)); switch (activation) { case NOOP: break; diff --git a/mace/kernels/opencl/depthwise_conv_opencl.cc b/mace/kernels/opencl/depthwise_conv_opencl.cc index 67304bd896bd5e5df14c273c2a839dccfea28390..063220e44404a8e85e056043b6ed8a34a030a9e6 100644 --- a/mace/kernels/opencl/depthwise_conv_opencl.cc +++ b/mace/kernels/opencl/depthwise_conv_opencl.cc @@ -154,11 +154,14 @@ void DepthwiseConv2dFunctor::operator()( std::vector output_shape(4); std::vector paddings(2); - kernels::CalcNHWCPaddingAndOutputSize( - input->shape().data(), fake_filter_shape.data(), dilations_, strides_, - padding_type_, output_shape.data(), paddings.data()); - if (!paddings_.empty()) { + if (paddings_.empty()) { + kernels::CalcNHWCPaddingAndOutputSize( + input->shape().data(), fake_filter_shape.data(), dilations_, strides_, + padding_type_, output_shape.data(), paddings.data()); + } else { paddings = paddings_; + CalcOutputSize(input->shape().data(), fake_filter_shape.data(), paddings_.data(), + dilations_, strides_, RoundType::FLOOR, output_shape.data()); } std::vector output_image_shape; diff --git a/mace/kernels/opencl/pooling_opencl.cc b/mace/kernels/opencl/pooling_opencl.cc index 9b612e48a558599751b7bde26df063689ea54c6a..2ec0e0845982ac32cb041203454342411f846e9f 100644 --- a/mace/kernels/opencl/pooling_opencl.cc +++ b/mace/kernels/opencl/pooling_opencl.cc @@ -24,12 +24,14 @@ void PoolingFunctor::operator()(const Tensor *input, }; std::vector paddings(2); - kernels::CalcNHWCPaddingAndOutputSize( - input->shape().data(), filter_shape.data(), - dilations_, strides_, this->padding_type_, - output_shape.data(), paddings.data()); - if (!paddings_.empty()) { + if (paddings_.empty()) { + kernels::CalcNHWCPaddingAndOutputSize( + input->shape().data(), filter_shape.data(), dilations_, strides_, + padding_type_, output_shape.data(), paddings.data()); + } else { paddings = paddings_; + CalcOutputSize(input->shape().data(), filter_shape.data(), paddings_.data(), + dilations_, strides_, RoundType::CEIL, output_shape.data()); } std::vector output_image_shape; diff --git a/mace/kernels/opencl/winograd_transform.cc b/mace/kernels/opencl/winograd_transform.cc index 31ca09f482d75999eb79aa59fca27f5bd0e9929d..ab8704a19e77eb8cedaa9ed8989dbeae90f62d66 100644 --- a/mace/kernels/opencl/winograd_transform.cc +++ b/mace/kernels/opencl/winograd_transform.cc @@ -18,11 +18,14 @@ void WinogradTransformFunctor::operator()(const Tensor *i std::vector output_shape(4); std::vector filter_shape = {3, 3, input_tensor->dim(3), 1}; std::vector paddings(2); - kernels::CalcNHWCPaddingAndOutputSize( - input_tensor->shape().data(), filter_shape.data(), dilations_.data(), - strides_.data(), padding_type_, output_shape.data(), paddings.data()); - if (!paddings_.empty()) { + if (paddings_.empty()) { + kernels::CalcNHWCPaddingAndOutputSize( + input_tensor->shape().data(), filter_shape.data(), dilations_.data(), strides_.data(), + padding_type_, output_shape.data(), paddings.data()); + } else { paddings = paddings_; + CalcOutputSize(input_tensor->shape().data(), filter_shape.data(), paddings_.data(), + dilations_.data(), strides_.data(), RoundType::FLOOR, output_shape.data()); } const index_t round_h = (output_shape[1] + 1) / 2; diff --git a/mace/kernels/pooling.h b/mace/kernels/pooling.h index 24455b924c22a2b34cf304d74f58df23ed1b7674..b2b765b5fb7f8514d684fddf5b4a70d91eb02a3e 100644 --- a/mace/kernels/pooling.h +++ b/mace/kernels/pooling.h @@ -65,12 +65,14 @@ struct PoolingFunctor : PoolingFunctorBase { }; std::vector paddings(2); - kernels::CalcNHWCPaddingAndOutputSize( - input_tensor->shape().data(), filter_shape.data(), - dilations_, strides_, this->padding_type_, - output_shape.data(), paddings.data()); - if (!paddings_.empty()) { + if (paddings_.empty()) { + kernels::CalcNHWCPaddingAndOutputSize( + input_tensor->shape().data(), filter_shape.data(), dilations_, strides_, + padding_type_, output_shape.data(), paddings.data()); + } else { paddings = paddings_; + CalcOutputSize(input_tensor->shape().data(), filter_shape.data(), paddings_.data(), + dilations_, strides_, RoundType::CEIL, output_shape.data()); } output_tensor->Resize(output_shape); diff --git a/mace/ops/conv_2d_test.cc b/mace/ops/conv_2d_test.cc index fb93504e5d946d832edc9bbed37a829a10985d8f..8beba453cc20b421581d16270be9dedeb82b6ddb 100644 --- a/mace/ops/conv_2d_test.cc +++ b/mace/ops/conv_2d_test.cc @@ -342,7 +342,7 @@ TEST_F(Conv2dOpTest, CPUConv1x1) { TestConv1x1(); } TEST_F(Conv2dOpTest, OPENCLConv1x1) { TestConv1x1(); } template -static void TestComplexConvNxNS12(const std::vector &shape) { +static void TestComplexConvNxNS12(const std::vector &shape, const int stride) { testing::internal::LogToStderr(); auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w, Padding type) { @@ -405,20 +405,31 @@ static void TestComplexConvNxNS12(const std::vector &shape) { ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 0.001); }; - for (int kernel_size : {1, 3}) { - for (int stride : {1, 2}) { - func(kernel_size, kernel_size, stride, stride, VALID); - func(kernel_size, kernel_size, stride, stride, SAME); - } + for (int kernel_size : {1, 3, 7}) { + func(kernel_size, kernel_size, stride, stride, VALID); + func(kernel_size, kernel_size, stride, stride, SAME); } } TEST_F(Conv2dOpTest, OPENCLAlignedConvNxNS12) { - TestComplexConvNxNS12({32, 32, 32, 64}); + TestComplexConvNxNS12({32, 16, 16, 32}, + 1); + TestComplexConvNxNS12({32, 16, 16, 32}, + 2); } TEST_F(Conv2dOpTest, OPENCLUnalignedConvNxNS12) { - TestComplexConvNxNS12({107, 113, 5, 7}); + TestComplexConvNxNS12({17, 113, 5, 7}, + 1); + TestComplexConvNxNS12({17, 113, 5, 7}, + 2); +} + +TEST_F(Conv2dOpTest, OPENCLUnalignedConvNxNS34) { + TestComplexConvNxNS12({31, 113, 13, 17}, + 3); + TestComplexConvNxNS12({32, 32, 13, 17}, + 4); } template @@ -650,3 +661,81 @@ TEST_F(Conv2dOpTest, OPENCLUnalignedDilation4) { 4); } +template +static void TestArbitraryPadConvNxN(const std::vector &shape, const std::vector &paddings) { + testing::internal::LogToStderr(); + auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w) { + srand(time(NULL)); + + // generate random input + index_t batch = 1; + index_t height = shape[0]; + index_t width = shape[1]; + index_t input_channels = shape[2]; + index_t output_channels = shape[3]; + // Construct graph + OpsTestNet net; + OpDefBuilder("Conv2D", "Conv2dTest") + .Input("Input") + .Input("Filter") + .Input("Bias") + .Output("Output") + .AddIntsArg("strides", {stride_h, stride_w}) + .AddIntsArg("padding_values", paddings) + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); + + // Add input data + net.AddRandomInput("Input", {batch, height, width, input_channels}); + net.AddRandomInput( + "Filter", {kernel_h, kernel_w, output_channels, input_channels}); + net.AddRandomInput("Bias", {output_channels}); + + // run on cpu + net.RunOp(); + // Check + Tensor expected; + expected.Copy(*net.GetOutput("Output")); + + // run on gpu + BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT_CHANNEL); + BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::CONV2D_FILTER); + BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + + OpDefBuilder("Conv2D", "Conv2dTest") + .Input("InputImage") + .Input("FilterImage") + .Input("BiasImage") + .Output("OutputImage") + .AddIntsArg("strides", {stride_h, stride_w}) + .AddIntsArg("padding_values", paddings) + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); + // Run on device + net.RunOp(D); + + ImageToBuffer(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT_CHANNEL); + ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 0.001); + }; + + for (int kernel_size : {3, 5}) { + for (int stride : {2, 3}) { + func(kernel_size, kernel_size, stride, stride); + } + } +} + +TEST_F(Conv2dOpTest, OPENCLAlignedPad1) { + TestArbitraryPadConvNxN({32, 32, 32, 64}, + {1, 1}); +} + +TEST_F(Conv2dOpTest, OPENCLAlignedPad2) { + TestArbitraryPadConvNxN({128, 128, 16, 16}, + {2, 2}); +} + +TEST_F(Conv2dOpTest, OPENCLUnalignedPad4) { + TestArbitraryPadConvNxN({107, 113, 5, 7}, + {4, 4}); +} diff --git a/mace/python/tools/caffe_ops_stats.py b/mace/python/tools/caffe_ops_stats.py index 7c3bb7c45e44bb5973f910127a89b7b1963143f7..4eba5b664de816722d370c61757117ef0ffd25fe 100644 --- a/mace/python/tools/caffe_ops_stats.py +++ b/mace/python/tools/caffe_ops_stats.py @@ -5,10 +5,14 @@ import functools import argparse import sys import six +import os.path FLAGS = None def main(unused_args): + if not os.path.isfile(FLAGS.input): + print 'input model file not exist' + return -1 net = caffe_pb2.NetParameter() with open(FLAGS.input) as f: google.protobuf.text_format.Merge(str(f.read()), net)