From 34c223e7e63885c400fbe0c62e4466eeff20d7f9 Mon Sep 17 00:00:00 2001 From: yejianwu Date: Wed, 17 Oct 2018 10:52:51 +0800 Subject: [PATCH] support eltwise broadcast on channel dimension --- mace/kernels/opencl/cl/eltwise.cl | 5 ++- mace/kernels/opencl/image/eltwise.h | 21 ++++++++----- mace/ops/eltwise_test.cc | 47 ++++++++++++++++++++++++++++- 3 files changed, 64 insertions(+), 9 deletions(-) diff --git a/mace/kernels/opencl/cl/eltwise.cl b/mace/kernels/opencl/cl/eltwise.cl index 9de68bc7..931d0eca 100644 --- a/mace/kernels/opencl/cl/eltwise.cl +++ b/mace/kernels/opencl/cl/eltwise.cl @@ -35,6 +35,9 @@ __kernel void eltwise(OUT_OF_RANGE_PARAMS DATA_TYPE4 in1 = READ_IMAGET(input1, SAMPLER, (int2)(chan_idx, batch_idx)); #elif INPUT_TYPE == 3 DATA_TYPE4 in1 = READ_IMAGET(input1, SAMPLER, (int2)(chan_idx, 0)); +#elif INPUT_TYPE == 4 + DATA_TYPE4 tmp = READ_IMAGET(input1, SAMPLER, (int2)(pos, hb)); + DATA_TYPE4 in1 = (DATA_TYPE4)(tmp.x, tmp.x, tmp.x, tmp.x); #else DATA_TYPE4 in1 = READ_IMAGET(input1, SAMPLER, (int2)(pos, hb)); #endif @@ -80,7 +83,7 @@ __kernel void eltwise(OUT_OF_RANGE_PARAMS #endif #endif -#if INPUT_TYPE == 1 +#if INPUT_TYPE == 1 || INPUT_TYPE == 4 #if ELTWISE_TYPE == 0 || ELTWISE_TYPE == 1 || ELTWISE_TYPE == 4 || \ ELTWISE_TYPE == 5 || ELTWISE_TYPE == 8 || ELTWISE_TYPE == 9 const int remain_channel = channel - 4 * chan_idx; diff --git a/mace/kernels/opencl/image/eltwise.h b/mace/kernels/opencl/image/eltwise.h index 2a18cbef..c2bbc3a5 100644 --- a/mace/kernels/opencl/image/eltwise.h +++ b/mace/kernels/opencl/image/eltwise.h @@ -77,12 +77,14 @@ MaceStatus EltwiseKernel::Compute( } if (input1->dim_size() == 1) { MACE_CHECK(input0->dim(3) == input1->dim(0)) - << "Element-Wise op only support channel dimension broadcast"; + << "Element-Wise op support broadcast on only-channel or non-channel dimension"; // NOLINT(whitespace/line_length) } else { - MACE_CHECK((input0->dim(0) == input1->dim(0) || input1->dim(0) == 1) && - input0->dim(3) == input1->dim(3) && input1->dim(1) == 1 && - input1->dim(2) == 1) - << "Element-Wise op only support channel dimension broadcast"; + MACE_CHECK(((input0->dim(0) == input1->dim(0) || input1->dim(0) == 1) + && input0->dim(3) == input1->dim(3) && input1->dim(1) == 1 && + input1->dim(2) == 1) || (input0->dim(0) == input1->dim(0) && + input0->dim(1) == input1->dim(1) && input0->dim(2) == input1->dim(2) + && input1->dim(3) == 1)) + << "Element-Wise op support broadcast on only-channel or non-channel dimension"; // NOLINT(whitespace/line_length) } } } @@ -129,10 +131,15 @@ MaceStatus EltwiseKernel::Compute( if (input1 == nullptr) { built_options.emplace("-DINPUT_TYPE=1"); } else if (input0->size() != input1->size()) { - if (input1->dim(0) == 1 || input1->dim_size() == 1) + if (input0->dim(0) == input1->dim(0) && input0->dim(1) == input1->dim(1) + && input0->dim(2) == input1->dim(2) && input1->dim(3) == 1) { + // only broadcast on channel + built_options.emplace("-DINPUT_TYPE=4"); + } else if (input1->dim(0) == 1 || input1->dim_size() == 1) { built_options.emplace("-DINPUT_TYPE=3"); - else + } else { built_options.emplace("-DINPUT_TYPE=2"); + } if (swapped) built_options.emplace("-DSWAPPED"); } if (!coeff_.empty()) built_options.emplace("-DCOEFF_SUM"); diff --git a/mace/ops/eltwise_test.cc b/mace/ops/eltwise_test.cc index 0fd1fc8d..d1506987 100644 --- a/mace/ops/eltwise_test.cc +++ b/mace/ops/eltwise_test.cc @@ -202,6 +202,27 @@ void TensorGeneralBroadcastEltwise(const kernels::EltwiseType type, // Run net.RunOp(D); + } else if (D == DeviceType::GPU) { + BufferToImage(&net, "Input0", "InputImage0", + kernels::BufferType::IN_OUT_CHANNEL); + BufferToImage(&net, "Input1", "InputImage1", + kernels::BufferType::IN_OUT_CHANNEL); + auto op_builder = + OpDefBuilder("Eltwise", "EltwiseTest") + .AddIntArg("T", DataTypeToEnum::v()) + .Input("InputImage0") + .Input("InputImage1") + .AddIntArg("type", static_cast(type)) + .AddFloatsArg("coeff", coeff) + .OutputType({kernels::IsLogicalType(type) ? DT_INT32 : DT_FLOAT}) + .Output("OutputImage"); + op_builder.Finalize(net.NewOperatorDef()); + + // Run + net.RunOp(D); + + ImageToBuffer(&net, "OutputImage", "Output", + kernels::BufferType::IN_OUT_CHANNEL); } else { MACE_NOT_IMPLEMENTED; } @@ -747,7 +768,7 @@ TEST_F(EltwiseOpTest, RandomTensorTensorHalf) { {3, 31, 37, 17}); } -TEST_F(EltwiseOpTest, TensorGeneralBroadcast) { +TEST_F(EltwiseOpTest, TensorGeneralBroadcastCPU) { TensorGeneralBroadcastEltwise( kernels::EltwiseType::SUM, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1}, {1, 2}, {1, 1, 2, 3}, {2, 3, 4, 6, 7, 8}); @@ -774,6 +795,30 @@ TEST_F(EltwiseOpTest, TensorGeneralBroadcast) { {1, 1, 2, 1}, {1, 2}, {1, 1, 2, 3}, {1, 0, 0, 0, 0, 0}); } +TEST_F(EltwiseOpTest, TensorGeneralBroadcastGPU) { + TensorGeneralBroadcastEltwise( + kernels::EltwiseType::SUM, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1}, + {1, 2}, {1, 1, 2, 3}, {2, 3, 4, 6, 7, 8}); + TensorGeneralBroadcastEltwise( + kernels::EltwiseType::SUB, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1}, + {1, 2}, {1, 1, 2, 3}, {0, 1, 2, 2, 3, 4}); + TensorGeneralBroadcastEltwise( + kernels::EltwiseType::PROD, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, + {1, 1, 2, 1}, {1, 2}, {1, 1, 2, 3}, {1, 2, 3, 8, 10, 12}); + TensorGeneralBroadcastEltwise( + kernels::EltwiseType::DIV, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1}, + {1, 2}, {1, 1, 2, 3}, {1, 2, 3, 2, 2.5, 3}); + TensorGeneralBroadcastEltwise( + kernels::EltwiseType::MIN, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1}, + {1, 2}, {1, 1, 2, 3}, {1, 1, 1, 2, 2, 2}); + TensorGeneralBroadcastEltwise( + kernels::EltwiseType::MAX, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1}, + {1, 2}, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}); + TensorGeneralBroadcastEltwise( + kernels::EltwiseType::SQR_DIFF, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, + {1, 1, 2, 1}, {1, 2}, {1, 1, 2, 3}, {0, 1, 4, 4, 9, 16}); +} + TEST_F(EltwiseOpTest, QuantizedSum) { QuantizedSum({1, 32, 32, 16}); QuantizedSum({1, 31, 31, 17}); -- GitLab