diff --git a/mace/core/runtime/hexagon/hexagon_dsp_wrapper.cc b/mace/core/runtime/hexagon/hexagon_dsp_wrapper.cc index a617e7c7f5f534d8bb765529c28524c1807b96ea..0a12bcc484a56e55132a1f240247bb784f047b76 100644 --- a/mace/core/runtime/hexagon/hexagon_dsp_wrapper.cc +++ b/mace/core/runtime/hexagon/hexagon_dsp_wrapper.cc @@ -432,7 +432,7 @@ bool HexagonDSPWrapper::ExecuteGraph(const Tensor &input_tensor, } MACE_CHECK(output_bytes == output_tensor->raw_size(), "wrong output bytes inferred."); - return res == 0; + return true; } bool HexagonDSPWrapper::ExecuteGraphNew( @@ -495,6 +495,7 @@ bool HexagonDSPWrapper::ExecuteGraphNew( num_inputs * kNumMetaData, outputs.data(), num_outputs * kNumMetaData); + MACE_CHECK(res == 0, "execute error"); // handle hexagon output for (size_t i = 0; i < num_outputs; ++i) { @@ -504,12 +505,12 @@ bool HexagonDSPWrapper::ExecuteGraphNew( outputs[index].depth}; MACE_CHECK(output_shape.size() == output_info_[i].shape.size(), output_shape.size(), " vs ", output_info_[i].shape.size(), - "wrong output shape inferred"); + " wrong output shape inferred"); for (size_t j = 0; j < output_shape.size(); ++j) { MACE_CHECK(static_cast(output_shape[j]) == output_info_[i].shape[j], output_shape[j], " vs ", output_info_[i].shape[j], - "wrong output shape inferred"); + " wrong output shape[", j, "] inferred"); } auto output_tensor = output_tensors->at(output_info_[i].name); MACE_CHECK(static_cast(outputs[index].data_valid_len) @@ -518,7 +519,7 @@ bool HexagonDSPWrapper::ExecuteGraphNew( " wrong output bytes inferred."); } - return res == 0; + return true; } } // namespace mace diff --git a/mace/ops/common/conv_pool_2d_util.cc b/mace/ops/common/conv_pool_2d_util.cc index 4398888174675cb202cccefcf4cb374b97925aca..072efaa7db51f707bd08aa877f7cdd3b15028964 100644 --- a/mace/ops/common/conv_pool_2d_util.cc +++ b/mace/ops/common/conv_pool_2d_util.cc @@ -76,7 +76,8 @@ void CalcPaddingAndOutputSize(const index_t *input_shape, output_height = (input_height - k_extent_height) / strides[0] + 1; output_width = (input_width - k_extent_width) / strides[1] + 1; break; - case SAME:output_height = (input_height - 1) / strides[0] + 1; + case SAME: + output_height = (input_height - 1) / strides[0] + 1; output_width = (input_width - 1) / strides[1] + 1; break; case FULL: diff --git a/mace/ops/opencl/cl/conv_2d.cl b/mace/ops/opencl/cl/conv_2d.cl index 4a3d9e0dd894423b44cfa4335e05f95b0c4ed21f..2bf4572b85184b0056d7a9d113953d13b43475e0 100644 --- a/mace/ops/opencl/cl/conv_2d.cl +++ b/mace/ops/opencl/cl/conv_2d.cl @@ -17,7 +17,8 @@ __kernel void conv_2d(OUT_OF_RANGE_PARAMS __private const int out_width, __private const int filter_height, __private const int filter_width, - __private const int stride, + __private const int stride_h, + __private const int stride_w, __private const int padding_top, __private const int padding_left, __private const int dilation_h, @@ -47,12 +48,12 @@ __kernel void conv_2d(OUT_OF_RANGE_PARAMS DATA_TYPE4 out3 = 0; #endif - int in_width_stride = mul24(out_w_blks, stride); - int in_width0 = mad24(out_w_blk, stride, -padding_left); + int in_width_stride = mul24(out_w_blks, stride_w); + int in_width0 = mad24(out_w_blk, stride_w, -padding_left); int in_width1 = in_width0 + in_width_stride; int in_width2 = in_width1 + in_width_stride; int in_width3 = in_width2 + in_width_stride; - const int height_start = mad24((out_hb % out_height), stride, -padding_top); + const int height_start = mad24((out_hb % out_height), stride_h, -padding_top); int in_height_gap = select( 0, (-height_start + dilation_h - 1) / dilation_h, diff --git a/mace/ops/opencl/cl/conv_2d_1x1.cl b/mace/ops/opencl/cl/conv_2d_1x1.cl index d0dc2e15f02877a25c8f0fa6a868a034d59513b4..1d146028133118015565259b7296455490cd6434 100644 --- a/mace/ops/opencl/cl/conv_2d_1x1.cl +++ b/mace/ops/opencl/cl/conv_2d_1x1.cl @@ -15,7 +15,8 @@ __kernel void conv_2d_1x1(OUT_OF_RANGE_PARAMS __private const int in_ch_blks, __private const int height, __private const int width, - __private const int stride) { + __private const int stride_h, + __private const int stride_w) { const int out_ch_blk = get_global_id(0); const int out_w_blk = get_global_id(1); const int out_hb = get_global_id(2); @@ -41,14 +42,14 @@ __kernel void conv_2d_1x1(OUT_OF_RANGE_PARAMS #endif int4 w; - int in_width_stride = mul24(out_w_blks, stride); - w.x = mul24(out_w_blk, stride); + int in_width_stride = mul24(out_w_blks, stride_w); + w.x = mul24(out_w_blk, stride_w); w.y = w.x + in_width_stride; w.z = w.y + in_width_stride; w.w = w.z + in_width_stride; int batch = out_hb / height; int h_idx = out_hb - mul24(batch, height); - int out_hb_idx = mul24(h_idx, stride); + int out_hb_idx = mul24(h_idx, stride_h); w.x = select(w.x, INT_MIN, w.x >= in_width); w.y = select(w.y, INT_MIN, w.y >= in_width); diff --git a/mace/ops/opencl/cl/conv_2d_3x3.cl b/mace/ops/opencl/cl/conv_2d_3x3.cl index aeb8533290f1e3662ead4cf227e6315f967d3bc4..0c37cd8040cc8eec243d133fd9ce79bef344bf64 100644 --- a/mace/ops/opencl/cl/conv_2d_3x3.cl +++ b/mace/ops/opencl/cl/conv_2d_3x3.cl @@ -15,7 +15,8 @@ __kernel void conv_2d_3x3(OUT_OF_RANGE_PARAMS __private const int in_ch_blks, __private const int out_height, __private const int out_width, - __private const int stride, + __private const int stride_h, + __private const int stride_w, __private const int padding_top, __private const int padding_left, __private const int dilation_h, @@ -47,13 +48,13 @@ __kernel void conv_2d_3x3(OUT_OF_RANGE_PARAMS DATA_TYPE4 out4 = 0; #endif - int in_width_stride = mul24(out_w_blks, stride); - int in_width0 = mad24(out_w_blk, stride, -padding_left); + int in_width_stride = mul24(out_w_blks, stride_w); + int in_width0 = mad24(out_w_blk, stride_w, -padding_left); int in_width1 = in_width0 + in_width_stride; int in_width2 = in_width1 + in_width_stride; int in_width3 = in_width2 + in_width_stride; int in_width4 = in_width3 + in_width_stride; - const int height_start = mad24((out_hb % out_height), stride, -padding_top); + const int height_start = mad24((out_hb % out_height), stride_h, -padding_top); int in_height_gap = select( 0, (-height_start + dilation_h - 1) / dilation_h, diff --git a/mace/ops/opencl/image/conv_2d.cc b/mace/ops/opencl/image/conv_2d.cc index 20c101a2410eb11c1a29fbe7f9aa4cfefda9511f..60a9b15537356435a32b8ff5404091e18f471b43 100644 --- a/mace/ops/opencl/image/conv_2d.cc +++ b/mace/ops/opencl/image/conv_2d.cc @@ -73,8 +73,7 @@ MaceStatus Conv2dKernel::Compute( Tensor *output) { index_t kernel_h = filter->dim(2); index_t kernel_w = filter->dim(3); - if (strides[0] != strides[1] || - (dilations[0] > 1 && (strides[0] > 1 || kernel_h == 1))) { + if (dilations[0] > 1 && (strides[0] > 1 || kernel_h == 1)) { LOG(WARNING) << "OpenCL conv2d kernel with " << "filter" << kernel_h << "x" << kernel_w << "," << " stride " << strides[0] << "x" << strides[1] @@ -131,6 +130,7 @@ MaceStatus Conv2dKernel::Compute( filter, bias, strides[0], + strides[1], paddings.data(), dilations, activation, @@ -148,6 +148,7 @@ MaceStatus Conv2dKernel::Compute( filter, bias, strides[0], + strides[1], paddings.data(), dilations, activation, @@ -165,6 +166,7 @@ MaceStatus Conv2dKernel::Compute( filter, bias, strides[0], + strides[1], paddings.data(), dilations, activation, diff --git a/mace/ops/opencl/image/conv_2d.h b/mace/ops/opencl/image/conv_2d.h index 84fae55dff77afef2056f3f8e1628413a73e0bc2..a1ee3301b373d43980d38b1fd38fb7876c5c47d2 100644 --- a/mace/ops/opencl/image/conv_2d.h +++ b/mace/ops/opencl/image/conv_2d.h @@ -33,7 +33,8 @@ extern MaceStatus Conv2dK1x1(OpContext *context, const Tensor *input, const Tensor *filter, const Tensor *bias, - const int stride, + const int stride_h, + const int stride_w, const int *padding, const int *dilations, const ActivationType activation, @@ -48,7 +49,8 @@ extern MaceStatus Conv2dK3x3(OpContext *context, const Tensor *input, const Tensor *filter, const Tensor *bias, - const int stride, + const int stride_h, + const int stride_w, const int *padding, const int *dilations, const ActivationType activation, @@ -63,7 +65,8 @@ extern MaceStatus Conv2d(OpContext *context, const Tensor *input, const Tensor *filter, const Tensor *bias, - const int stride, + const int stride_h, + const int stride_w, const int *padding, const int *dilations, const ActivationType activation, diff --git a/mace/ops/opencl/image/conv_2d_1x1.cc b/mace/ops/opencl/image/conv_2d_1x1.cc index 460d01323dccd584b880f2cdc27b5d2e4c2735fe..718240152f6a74e0835c75edbae7782a1fb8c23f 100644 --- a/mace/ops/opencl/image/conv_2d_1x1.cc +++ b/mace/ops/opencl/image/conv_2d_1x1.cc @@ -71,7 +71,8 @@ MaceStatus Conv2dK1x1(OpContext *context, const Tensor *input, const Tensor *filter, const Tensor *bias, - const int stride, + const int stride_h, + const int stride_w, const int *padding, const int *dilations, const ActivationType activation, @@ -170,7 +171,8 @@ MaceStatus Conv2dK1x1(OpContext *context, kernel->setArg(idx++, static_cast(input_channel_blocks)); kernel->setArg(idx++, static_cast(height)); kernel->setArg(idx++, static_cast(width)); - kernel->setArg(idx++, stride); + kernel->setArg(idx++, stride_h); + kernel->setArg(idx++, stride_w); *prev_input_shape = input->shape(); } diff --git a/mace/ops/opencl/image/conv_2d_3x3.cc b/mace/ops/opencl/image/conv_2d_3x3.cc index a3bd170f64079a5b4533dd2a4fb104dbee752cfd..d8a8b9cfbba611e5d0a320e8708f7f08c0a2b844 100644 --- a/mace/ops/opencl/image/conv_2d_3x3.cc +++ b/mace/ops/opencl/image/conv_2d_3x3.cc @@ -64,7 +64,8 @@ MaceStatus Conv2dK3x3(OpContext *context, const Tensor *input, const Tensor *filter, const Tensor *bias, - const int stride, + const int stride_h, + const int stride_w, const int *padding, const int *dilations, const ActivationType activation, @@ -154,7 +155,8 @@ MaceStatus Conv2dK3x3(OpContext *context, kernel->setArg(idx++, static_cast(input_channel_blocks)); kernel->setArg(idx++, static_cast(height)); kernel->setArg(idx++, static_cast(width)); - kernel->setArg(idx++, stride); + kernel->setArg(idx++, stride_h); + kernel->setArg(idx++, stride_w); kernel->setArg(idx++, padding[0] / 2); kernel->setArg(idx++, padding[1] / 2); kernel->setArg(idx++, dilations[0]); diff --git a/mace/ops/opencl/image/conv_2d_general.cc b/mace/ops/opencl/image/conv_2d_general.cc index e1979c03a715a8ec0a74bf26d35e3f34484d0c55..bf4baea72da0e0776eb3223e71650b475c994906 100644 --- a/mace/ops/opencl/image/conv_2d_general.cc +++ b/mace/ops/opencl/image/conv_2d_general.cc @@ -72,7 +72,8 @@ MaceStatus Conv2d(OpContext *context, const Tensor *input, const Tensor *filter, const Tensor *bias, - const int stride, + const int stride_h, + const int stride_w, const int *padding, const int *dilations, const ActivationType activation, @@ -164,7 +165,8 @@ MaceStatus Conv2d(OpContext *context, kernel->setArg(idx++, static_cast(width)); kernel->setArg(idx++, static_cast(filter->dim(2))); kernel->setArg(idx++, static_cast(filter->dim(3))); - kernel->setArg(idx++, static_cast(stride)); + kernel->setArg(idx++, static_cast(stride_h)); + kernel->setArg(idx++, static_cast(stride_w)); kernel->setArg(idx++, padding[0] / 2); kernel->setArg(idx++, padding[1] / 2); kernel->setArg(idx++, dilations[0]); diff --git a/test/ccunit/mace/ops/conv_2d_test.cc b/test/ccunit/mace/ops/conv_2d_test.cc index 3f97d0d2b70496962af3ac0d53501f6c8026df52..f9823265082b8e917551ba67923679110e9f5c8a 100644 --- a/test/ccunit/mace/ops/conv_2d_test.cc +++ b/test/ccunit/mace/ops/conv_2d_test.cc @@ -42,7 +42,8 @@ void TestNHWCSimple3x3VALID(int wino_blk_size = 0) { net.AddInputFromArray( "Filter", {1, 2, 3, 3}, {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, - 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, true); + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, + true); net.AddInputFromArray("Bias", {1}, {0.1f}, true); const std::vector output_shape = {1, 1, 1, 1}; @@ -100,7 +101,8 @@ void TestNHWCSimple3x3SAME(int wino_blk_size = 0) { net.AddInputFromArray( "Filter", {1, 2, 3, 3}, {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, - 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, true); + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, + true); net.AddInputFromArray("Bias", {1}, {0.1f}, true); const std::vector output_shape = {1, 3, 3, 1}; @@ -149,6 +151,65 @@ void TestNHWCSimple3x3SAME(int wino_blk_size = 0) { ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-3, 1e-3); } } + +template +void TestNHWCSimple3x3NeqStride(int wino_blk_size = 0) { + OpsTestNet net; + // Add input data + net.AddInputFromArray( + "Input", {1, 3, 3, 2}, + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); + net.AddInputFromArray( + "Filter", {1, 2, 3, 3}, + {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, + true); + net.AddInputFromArray("Bias", {1}, {0.1f}, true); + const std::vector output_shape = {1, 3, 2, 1}; + + if (D == DeviceType::CPU) { + net.TransformDataFormat( + "Input", DataFormat::NHWC, "InputNCHW", DataFormat::NCHW); + OpDefBuilder("Conv2D", "Conv2dTest") + .Input("InputNCHW") + .Input("Filter") + .Input("Bias") + .Output("OutputNCHW") + .AddIntsArg("strides", {1, 2}) + .AddIntArg("padding", Padding::SAME) + .AddIntsArg("dilations", {1, 1}) + .Finalize(net.NewOperatorDef()); + // Run + net.RunOp(D); + net.TransformDataFormat( + "OutputNCHW", DataFormat::NCHW, "Output", DataFormat::NHWC); + } else if (D == DeviceType::GPU) { + OpDefBuilder("Conv2D", "Conv2dTest") + .Input("Input") + .Input("Filter") + .Input("Bias") + .Output("Output") + .OutputShape(output_shape) + .AddIntsArg("strides", {1, 2}) + .AddIntArg("padding", Padding::SAME) + .AddIntsArg("dilations", {1, 1}) + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .AddIntArg("wino_block_size", wino_blk_size) + .Finalize(net.NewOperatorDef()); + + net.RunOp(D); + } else { + MACE_NOT_IMPLEMENTED; + } + + auto expected = net.CreateTensor( + output_shape, {8.1f, 8.1f, 12.1f, 12.1f, 8.1f, 8.1f}); + if (DataTypeToEnum::value == DataType::DT_FLOAT) { + ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); + } else { + ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-3, 1e-3); + } +} } // namespace TEST_F(Conv2dOpTest, CPUSimple) { @@ -171,7 +232,12 @@ TEST_F(Conv2dOpTest, OPENCLSimpleWinograd) { TestNHWCSimple3x3VALID(2); TestNHWCSimple3x3VALID(2); // TODO(liutuo): the precision of the last value is not normal. -// TestNHWCSimple3x3SAME(4); + // TestNHWCSimple3x3SAME(4); +} + +TEST_F(Conv2dOpTest, NotEqualStrideSimple) { + TestNHWCSimple3x3NeqStride(); + TestNHWCSimple3x3NeqStride(); } namespace { @@ -186,7 +252,8 @@ void TestNHWCSimple3x3WithoutBias() { net.AddInputFromArray( "Filter", {1, 2, 3, 3}, {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, - 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, true); + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, + true); if (D == DeviceType::CPU) { net.TransformDataFormat( @@ -316,7 +383,8 @@ void TestFusedNHWCSimple3x3VALID(int wino_blk_size = 0) { net.AddInputFromArray( "Filter", {1, 2, 3, 3}, {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, - 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, true); + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, + true); net.AddInputFromArray("Bias", {1}, {-0.1f}, true); const std::vector output_shape = {1, 1, 1, 1}; @@ -372,7 +440,8 @@ void TestFusedNHWCSimple3x3WithoutBias(int wino_blk_size = 0) { net.AddInputFromArray( "Filter", {1, 2, 3, 3}, {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, - 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, true); + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, + true); const std::vector output_shape = {1, 1, 1, 1}; if (D == DeviceType::CPU) { @@ -502,17 +571,87 @@ void TestConv1x1() { ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); } + +template +void TestConv1x1NeqStride() { + // Construct graph + OpsTestNet net; + + // Add input data + net.AddInputFromArray( + "Input", {1, 3, 10, 5}, + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); + net.AddInputFromArray( + "Filter", {2, 5, 1, 1}, + {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f}, true); + net.AddInputFromArray("Bias", {2}, {0.1f, 0.2f}, true); + + if (D == DeviceType::CPU) { + net.TransformDataFormat( + "Input", DataFormat::NHWC, "InputNCHW", DataFormat::NCHW); + OpDefBuilder("Conv2D", "Conv2DTest") + .Input("InputNCHW") + .Input("Filter") + .Input("Bias") + .Output("OutputNCHW") + .AddIntsArg("strides", {1, 2}) + .AddIntArg("padding", Padding::VALID) + .AddIntsArg("dilations", {1, 1}) + .Finalize(net.NewOperatorDef()); + // Run + net.RunOp(D); + net.TransformDataFormat( + "OutputNCHW", DataFormat::NCHW, "Output", DataFormat::NHWC); + } else if (D == DeviceType::GPU) { + OpDefBuilder("Conv2D", "Conv2DTest") + .Input("Input") + .Input("Filter") + .Input("Bias") + .Output("Output") + .AddIntsArg("strides", {1, 2}) + .AddIntArg("padding", Padding::VALID) + .AddIntsArg("dilations", {1, 1}) + .Finalize(net.NewOperatorDef()); + // Run + net.RunOp(D); + } else { + MACE_NOT_IMPLEMENTED; + } + + // Check + auto expected = net.CreateTensor( + {1, 3, 5, 2}, + {5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, + 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, + 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f}); + + ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); +} } // namespace TEST_F(Conv2dOpTest, CPUConv1x1) { TestConv1x1(); } TEST_F(Conv2dOpTest, OPENCLConv1x1) { TestConv1x1(); } +TEST_F(Conv2dOpTest, CPUConv1x1NotEqualStride) { + TestConv1x1NeqStride(); +} + +TEST_F(Conv2dOpTest, OPENCLConv1x1NotEqualStride) { + TestConv1x1NeqStride(); +} + namespace { template -void TestComplexConvNxNS12(const std::vector &shape, - const int stride, - const int wino_blk_size = 0) { +void TestComplexConvNxN(const std::vector &shape, + const std::vector strides, + const int wino_blk_size = 0) { testing::internal::LogToStderr(); auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w, Padding type) { @@ -581,35 +720,40 @@ void TestComplexConvNxNS12(const std::vector &shape, }; for (int kernel_size : {1, 3, 5, 7}) { - func(kernel_size, kernel_size, stride, stride, VALID); - func(kernel_size, kernel_size, stride, stride, SAME); + func(kernel_size, kernel_size, strides[0], strides[1], VALID); + func(kernel_size, kernel_size, strides[0], strides[1], SAME); } } } // namespace TEST_F(Conv2dOpTest, OPENCLAlignedConvNxNS12) { - TestComplexConvNxNS12({32, 16, 16, 32}, 1); - TestComplexConvNxNS12({32, 16, 16, 32}, 2); + TestComplexConvNxN({32, 16, 16, 32}, {1, 1}); + TestComplexConvNxN({32, 16, 16, 32}, {2, 2}); } TEST_F(Conv2dOpTest, OPENCLAlignedConvNxNWinograd) { - TestComplexConvNxNS12({32, 16, 16, 32}, 1, 2); - TestComplexConvNxNS12({32, 16, 16, 32}, 1, 4); + TestComplexConvNxN({32, 16, 16, 32}, {1, 1}, 2); + TestComplexConvNxN({32, 16, 16, 32}, {1, 1}, 4); } TEST_F(Conv2dOpTest, OPENCLUnalignedConvNxNS12) { - TestComplexConvNxNS12({17, 113, 5, 7}, 1); - TestComplexConvNxNS12({17, 113, 5, 7}, 2); + TestComplexConvNxN({17, 113, 5, 7}, {1, 1}); + TestComplexConvNxN({17, 113, 5, 7}, {2, 2}); } TEST_F(Conv2dOpTest, OPENCLUnalignedConvNxNWinograd) { - TestComplexConvNxNS12({17, 113, 5, 7}, 1, 4); - TestComplexConvNxNS12({17, 113, 5, 7}, 1, 2); + TestComplexConvNxN({17, 113, 5, 7}, {1, 1}, 4); + TestComplexConvNxN({17, 113, 5, 7}, {1, 1}, 2); } TEST_F(Conv2dOpTest, OPENCLUnalignedConvNxNS34) { - TestComplexConvNxNS12({31, 113, 13, 17}, 3); - TestComplexConvNxNS12({32, 32, 13, 17}, 4); + TestComplexConvNxN({31, 113, 13, 17}, {3, 3}); + TestComplexConvNxN({32, 32, 13, 17}, {4, 4}); +} + +TEST_F(Conv2dOpTest, OPENCLUnalignedConvNxNWithNotEqualStride) { + TestComplexConvNxN({31, 113, 13, 17}, {1, 2}); + TestComplexConvNxN({32, 32, 13, 17}, {3, 4}); } namespace { @@ -646,9 +790,7 @@ void TestHalfComplexConvNxNS12(const std::vector &input_shape, net.AddInputFromArray( "Filter", {output_channels, input_channels, kernel_h, kernel_w}, float_filter_data, true); - net.AddInputFromArray("Bias", - {output_channels}, - float_bias_data, + net.AddInputFromArray("Bias", {output_channels}, float_bias_data, true); net.TransformDataFormat( @@ -713,11 +855,11 @@ TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv3x3S12) { } TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv3x3Winograd) { - TestHalfComplexConvNxNS12({32, 32}, {3, 3, 32, 64}, - {1, 1}, 2); -// TODO(liutuo) : the precision error is large. -// TestHalfComplexConvNxNS12({32, 32}, {3, 3, 32, 64}, -// {1, 1}, 4); + TestHalfComplexConvNxNS12({32, 32}, {3, 3, 32, 64}, {1, 1}, + 2); + // TODO(liutuo) : the precision error is large. + // TestHalfComplexConvNxNS12({32, 32}, {3, 3, 32, 64}, + // {1, 1}, 4); } TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv5x5S12) { @@ -769,10 +911,10 @@ TEST_F(Conv2dOpTest, OPENCLHalfUnalignedConv3x3S12) { TEST_F(Conv2dOpTest, OPENCLHalfUnalignedConv3x3Winograd) { // TODO(liutuo) : the precision error is large. -// TestHalfComplexConvNxNS12({107, 113}, {3, 3, 5, 7}, -// {1, 1}, 4); - TestHalfComplexConvNxNS12({107, 113}, {3, 3, 5, 7}, - {1, 1}, 2); + // TestHalfComplexConvNxNS12({107, 113}, {3, 3, 5, 7}, + // {1, 1}, 4); + TestHalfComplexConvNxNS12({107, 113}, {3, 3, 5, 7}, {1, 1}, + 2); } TEST_F(Conv2dOpTest, OPENCLHalfConv5x5Dilation2) { @@ -1045,10 +1187,10 @@ TEST_F(Conv2dOpTest, OPENCLAlignedPad2) { } TEST_F(Conv2dOpTest, OPENCLAlignedPad2Winograd) { - TestArbitraryPadConvNxN({128, 128, 16, 16}, - {2, 2}, 2); - TestArbitraryPadConvNxN({128, 128, 16, 16}, - {2, 2}, 4); + TestArbitraryPadConvNxN({128, 128, 16, 16}, {2, 2}, + 2); + TestArbitraryPadConvNxN({128, 128, 16, 16}, {2, 2}, + 4); } TEST_F(Conv2dOpTest, OPENCLUnalignedPad4) { @@ -1071,15 +1213,15 @@ void TestQuantSimple3x3() { // Add input data net.AddInputFromArray( - "Filter", {1, 3, 3, 2}, - {102, 150, 123, 135, 1, 216, 137, 47, 53, 75, 145, 130, 171, 62, 255, - 122, 72, 211}, true, 0.0226, 127); + "Filter", {1, 3, 3, 2}, {102, 150, 123, 135, 1, 216, 137, 47, 53, 75, 145, + 130, 171, 62, 255, 122, 72, 211}, + true, 0.0226, 127); net.AddInputFromArray( - "Input", {1, 3, 3, 2}, - {1, 75, 117, 161, 127, 119, 94, 151, 203, 151, 84, 61, 55, 142, 113, 139, - 3, 255}, false, 0.0204, 93); - net.AddInputFromArray( - "Bias", {1}, {2}, true, 0.00046104, 0); + "Input", {1, 3, 3, 2}, {1, 75, 117, 161, 127, 119, 94, 151, 203, 151, 84, + 61, 55, 142, 113, 139, 3, 255}, + false, 0.0204, 93); + net.AddInputFromArray("Bias", {1}, {2}, true, + 0.00046104, 0); OpDefBuilder("Conv2D", "Conv2dTest") .Input("Input") @@ -1113,17 +1255,15 @@ void TestQuant(const index_t batch, enum Padding padding_type, const std::vector &strides) { OpsTestNet net; - net.AddRandomInput("Input", {batch, in_height, in_width, - in_channels}); - net.AddRandomInput("Filter", {out_channels, k_height, k_width, - in_channels}, true); + net.AddRandomInput("Input", + {batch, in_height, in_width, in_channels}); + net.AddRandomInput( + "Filter", {out_channels, k_height, k_width, in_channels}, true); net.AddRandomInput("Bias", {out_channels}, true); net.TransformDataFormat( "Input", DataFormat::NHWC, "InputNCHW", DataFormat::NCHW); - net.TransformFilterDataFormat("Filter", - DataFormat::OHWI, - "FilterOIHW", - DataFormat::OIHW); + net.TransformFilterDataFormat( + "Filter", DataFormat::OHWI, "FilterOIHW", DataFormat::OIHW); OpDefBuilder("Conv2D", "Conv2dTest") .Input("InputNCHW") @@ -1172,10 +1312,10 @@ void TestQuant(const index_t batch, auto bias_data = bias->data(); float bias_scale = q_input->scale() * q_filter->scale(); std::vector q_bias(bias->size()); - QuantizeUtil - quantize_util(OpTestContext::Get()->thread_pool()); - quantize_util.QuantizeWithScaleAndZeropoint( - bias_data, bias->size(), bias_scale, 0, q_bias.data()); + QuantizeUtil quantize_util( + OpTestContext::Get()->thread_pool()); + quantize_util.QuantizeWithScaleAndZeropoint(bias_data, bias->size(), + bias_scale, 0, q_bias.data()); net.AddInputFromArray( "QuantizedBias", {out_channels}, q_bias, true, bias_scale, 0); diff --git a/tools/python/transform/base_converter.py b/tools/python/transform/base_converter.py index 11a15b2a5ad0612cc8508f53bdb6ff5fa077cfb4..0d6a89debeacbe7b7b6986145480d98148ebcd35 100644 --- a/tools/python/transform/base_converter.py +++ b/tools/python/transform/base_converter.py @@ -180,7 +180,6 @@ MaceTransposableDataFormatOps = [MaceOp.Activation, MaceOp.Eltwise, MaceOp.Pad, MaceOp.Reduce, - MaceOp.Reshape, MaceOp.Softmax, MaceOp.Split, MaceOp.Squeeze, diff --git a/tools/python/transform/hexagon_converter.py b/tools/python/transform/hexagon_converter.py index 900c0ba1f3669d6c2a1bc4899362f963ddd2dae4..d8fcc1f77bbb442cc5b2d7bca9d471a5271b7c9b 100644 --- a/tools/python/transform/hexagon_converter.py +++ b/tools/python/transform/hexagon_converter.py @@ -121,9 +121,9 @@ class HexagonConverter(base_converter.ConverterInterface): # convert op node self.convert_ops() - self.convert_input_output_node() + model_inputs = self.convert_input_output_node() - self.add_node_id() + self.add_node_id(model_inputs) return self._model @@ -235,8 +235,11 @@ class HexagonConverter(base_converter.ConverterInterface): for input_node in self._option.input_nodes.values(): op_name = normalize_name( MaceKeyword.mace_input_node_name + '_' + input_node.name) - op = first_quantize_input_op \ - if op_name == first_quantize_input_op.name else ops[op_name] + if op_name == first_quantize_input_op.name: + op = first_quantize_input_op + quantize_input_op.name = MaceKeyword.mace_input_node_name + else: + op = ops[op_name] mace_check(op.type == HexagonOp.QuantizeINPUT_f_to_8.name, "input node type is: %s" % op.type) quantize_input_op.output.extend(op.output) @@ -276,7 +279,9 @@ class HexagonConverter(base_converter.ConverterInterface): dequantize_output_op.type = HexagonOp.OUTPUT.name del dequantize_output_op.input[1:] - def add_node_id(self): + return quantize_input_op.output + + def add_node_id(self, model_inputs): node_id_counter = 0 node_id_map = {} for tensor in self._model.tensors: @@ -305,7 +310,11 @@ class HexagonConverter(base_converter.ConverterInterface): node_id = node_id_map[tensor_name] node_input = op.node_input.add() node_input.node_id = node_id - node_input.output_port = int(port) + if tensor_name in model_inputs: + for i in range(len(model_inputs)): + if model_inputs[i] == tensor_name: + port += i * 3 + node_input.output_port = port def convert_ops(self): print("Convert mace graph to hexagon.") diff --git a/tools/python/transform/tensorflow_converter.py b/tools/python/transform/tensorflow_converter.py index 48b475bbf9f1abd35f25b4f9fa5e44e001cec5e3..38672a97e01b7d41b253744ad03046705506ae25 100644 --- a/tools/python/transform/tensorflow_converter.py +++ b/tools/python/transform/tensorflow_converter.py @@ -1075,6 +1075,7 @@ class TensorflowConverter(base_converter.ConverterInterface): if tf_op.type == TFOpType.FakeQuantWithMinMaxVars.name: self._skip_tensor.add(tf_op.inputs[1].name) self._skip_tensor.add(tf_op.inputs[2].name) + del op.input[1:] def convert_cumsum(self, tf_op): op = self.convert_general_op(tf_op) diff --git a/tools/python/transform/transformer.py b/tools/python/transform/transformer.py index 5344e8d5f73217465196526054b676d193940548..4c56b233f0f9395588d471168cdf4b350cc603c7 100644 --- a/tools/python/transform/transformer.py +++ b/tools/python/transform/transformer.py @@ -1734,16 +1734,18 @@ class Transformer(base_converter.ConverterInterface): for op in net.op: if op.type == 'FakeQuantWithMinMaxVars' or \ op.type == 'FakeQuantWithMinMaxArgs': - producer_op = self._producer[op.input[0]] - minval = ConverterUtil.get_arg(op, 'min').f - maxval = ConverterUtil.get_arg(op, 'max').f - quantize_info = \ - self.add_quantize_info(producer_op, minval, maxval) - self._quantize_activation_info[op.input[0]] = quantize_info - # for add -> fakequant pattern - self._quantize_activation_info[op.output[0]] = quantize_info - - print(op.input[0], op.output[0]) + if op.input[0] not in self._consts: + producer_op = self._producer[op.input[0]] + minval = ConverterUtil.get_arg(op, 'min').f + maxval = ConverterUtil.get_arg(op, 'max').f + quantize_info = \ + self.add_quantize_info(producer_op, minval, maxval) + self._quantize_activation_info[op.input[0]] = quantize_info + # for add -> fakequant pattern + self._quantize_activation_info[op.output[0]] = \ + quantize_info + + print(op.input[0], op.output[0]) op.type = MaceOp.Identity.name return False @@ -1850,6 +1852,8 @@ class Transformer(base_converter.ConverterInterface): quantize_info.scale = scale quantize_info.zero_point = zero self._quantize_activation_info[new_input_name] = quantize_info + input_op = self._producer[input_node.name] + input_op.quantize_info.extend([quantize_info]) print("Add default quantize info for ops like Pooling, Softmax") for op in self._model.op: @@ -1904,8 +1908,8 @@ class Transformer(base_converter.ConverterInterface): elif (op.type == MaceOp.Eltwise.name and not op.quantize_info and len(op.input) == 2 - and len(op.input[0]) not in self._consts - and len(op.input[1]) not in self._consts): + and op.input[0] not in self._consts + and op.input[1] not in self._consts): producer_op0 = self._producer[op.input[0]] producer_op1 = self._producer[op.input[1]] if ConverterUtil.get_arg( diff --git a/tools/python/utils/util.py b/tools/python/utils/util.py index d2e25a92c42a8c02a896faae37c1a2229f1d3136..3c8bc80c6d1d1f53ae7bf0fce68fc481b0f454fa 100644 --- a/tools/python/utils/util.py +++ b/tools/python/utils/util.py @@ -65,8 +65,8 @@ class MaceLogger: + CMDColors.ENDC) @staticmethod - def error(message): - print(CMDColors.RED + 'ERROR: ' + get_frame_info() + str(message) + def error(message, level=2): + print(CMDColors.RED + 'ERROR: ' + get_frame_info(level) + str(message) + CMDColors.ENDC) exit(1) @@ -76,7 +76,7 @@ def mace_check(condition, message): for line in traceback.format_stack(): print(line.strip()) - MaceLogger.error(message) + MaceLogger.error(message, level=3) ################################