提交 a2dbc2a4 编写于 作者: 刘托

Merge branch 'support_broadcast_on_channel' into 'master'

support eltwise broadcast on channel dimension

See merge request !831
...@@ -35,6 +35,9 @@ __kernel void eltwise(OUT_OF_RANGE_PARAMS ...@@ -35,6 +35,9 @@ __kernel void eltwise(OUT_OF_RANGE_PARAMS
DATA_TYPE4 in1 = READ_IMAGET(input1, SAMPLER, (int2)(chan_idx, batch_idx)); DATA_TYPE4 in1 = READ_IMAGET(input1, SAMPLER, (int2)(chan_idx, batch_idx));
#elif INPUT_TYPE == 3 #elif INPUT_TYPE == 3
DATA_TYPE4 in1 = READ_IMAGET(input1, SAMPLER, (int2)(chan_idx, 0)); DATA_TYPE4 in1 = READ_IMAGET(input1, SAMPLER, (int2)(chan_idx, 0));
#elif INPUT_TYPE == 4
DATA_TYPE4 tmp = READ_IMAGET(input1, SAMPLER, (int2)(pos, hb));
DATA_TYPE4 in1 = (DATA_TYPE4)(tmp.x, tmp.x, tmp.x, tmp.x);
#else #else
DATA_TYPE4 in1 = READ_IMAGET(input1, SAMPLER, (int2)(pos, hb)); DATA_TYPE4 in1 = READ_IMAGET(input1, SAMPLER, (int2)(pos, hb));
#endif #endif
...@@ -80,7 +83,7 @@ __kernel void eltwise(OUT_OF_RANGE_PARAMS ...@@ -80,7 +83,7 @@ __kernel void eltwise(OUT_OF_RANGE_PARAMS
#endif #endif
#endif #endif
#if INPUT_TYPE == 1 #if INPUT_TYPE == 1 || INPUT_TYPE == 4
#if ELTWISE_TYPE == 0 || ELTWISE_TYPE == 1 || ELTWISE_TYPE == 4 || \ #if ELTWISE_TYPE == 0 || ELTWISE_TYPE == 1 || ELTWISE_TYPE == 4 || \
ELTWISE_TYPE == 5 || ELTWISE_TYPE == 8 || ELTWISE_TYPE == 9 ELTWISE_TYPE == 5 || ELTWISE_TYPE == 8 || ELTWISE_TYPE == 9
const int remain_channel = channel - 4 * chan_idx; const int remain_channel = channel - 4 * chan_idx;
......
...@@ -77,12 +77,14 @@ MaceStatus EltwiseKernel<T>::Compute( ...@@ -77,12 +77,14 @@ MaceStatus EltwiseKernel<T>::Compute(
} }
if (input1->dim_size() == 1) { if (input1->dim_size() == 1) {
MACE_CHECK(input0->dim(3) == input1->dim(0)) MACE_CHECK(input0->dim(3) == input1->dim(0))
<< "Element-Wise op only support channel dimension broadcast"; << "Element-Wise op support broadcast on only-channel or non-channel dimension"; // NOLINT(whitespace/line_length)
} else { } else {
MACE_CHECK((input0->dim(0) == input1->dim(0) || input1->dim(0) == 1) && MACE_CHECK(((input0->dim(0) == input1->dim(0) || input1->dim(0) == 1)
input0->dim(3) == input1->dim(3) && input1->dim(1) == 1 && && input0->dim(3) == input1->dim(3) && input1->dim(1) == 1 &&
input1->dim(2) == 1) input1->dim(2) == 1) || (input0->dim(0) == input1->dim(0) &&
<< "Element-Wise op only support channel dimension broadcast"; input0->dim(1) == input1->dim(1) && input0->dim(2) == input1->dim(2)
&& input1->dim(3) == 1))
<< "Element-Wise op support broadcast on only-channel or non-channel dimension"; // NOLINT(whitespace/line_length)
} }
} }
} }
...@@ -129,10 +131,15 @@ MaceStatus EltwiseKernel<T>::Compute( ...@@ -129,10 +131,15 @@ MaceStatus EltwiseKernel<T>::Compute(
if (input1 == nullptr) { if (input1 == nullptr) {
built_options.emplace("-DINPUT_TYPE=1"); built_options.emplace("-DINPUT_TYPE=1");
} else if (input0->size() != input1->size()) { } else if (input0->size() != input1->size()) {
if (input1->dim(0) == 1 || input1->dim_size() == 1) if (input0->dim(0) == input1->dim(0) && input0->dim(1) == input1->dim(1)
&& input0->dim(2) == input1->dim(2) && input1->dim(3) == 1) {
// only broadcast on channel
built_options.emplace("-DINPUT_TYPE=4");
} else if (input1->dim(0) == 1 || input1->dim_size() == 1) {
built_options.emplace("-DINPUT_TYPE=3"); built_options.emplace("-DINPUT_TYPE=3");
else } else {
built_options.emplace("-DINPUT_TYPE=2"); built_options.emplace("-DINPUT_TYPE=2");
}
if (swapped) built_options.emplace("-DSWAPPED"); if (swapped) built_options.emplace("-DSWAPPED");
} }
if (!coeff_.empty()) built_options.emplace("-DCOEFF_SUM"); if (!coeff_.empty()) built_options.emplace("-DCOEFF_SUM");
......
...@@ -202,6 +202,27 @@ void TensorGeneralBroadcastEltwise(const kernels::EltwiseType type, ...@@ -202,6 +202,27 @@ void TensorGeneralBroadcastEltwise(const kernels::EltwiseType type,
// Run // Run
net.RunOp(D); net.RunOp(D);
} else if (D == DeviceType::GPU) {
BufferToImage<D, T>(&net, "Input0", "InputImage0",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(&net, "Input1", "InputImage1",
kernels::BufferType::IN_OUT_CHANNEL);
auto op_builder =
OpDefBuilder("Eltwise", "EltwiseTest")
.AddIntArg("T", DataTypeToEnum<T>::v())
.Input("InputImage0")
.Input("InputImage1")
.AddIntArg("type", static_cast<int>(type))
.AddFloatsArg("coeff", coeff)
.OutputType({kernels::IsLogicalType(type) ? DT_INT32 : DT_FLOAT})
.Output("OutputImage");
op_builder.Finalize(net.NewOperatorDef());
// Run
net.RunOp(D);
ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL);
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
} }
...@@ -747,7 +768,7 @@ TEST_F(EltwiseOpTest, RandomTensorTensorHalf) { ...@@ -747,7 +768,7 @@ TEST_F(EltwiseOpTest, RandomTensorTensorHalf) {
{3, 31, 37, 17}); {3, 31, 37, 17});
} }
TEST_F(EltwiseOpTest, TensorGeneralBroadcast) { TEST_F(EltwiseOpTest, TensorGeneralBroadcastCPU) {
TensorGeneralBroadcastEltwise<DeviceType::CPU, float, float>( TensorGeneralBroadcastEltwise<DeviceType::CPU, float, float>(
kernels::EltwiseType::SUM, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1}, kernels::EltwiseType::SUM, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1},
{1, 2}, {1, 1, 2, 3}, {2, 3, 4, 6, 7, 8}); {1, 2}, {1, 1, 2, 3}, {2, 3, 4, 6, 7, 8});
...@@ -774,6 +795,30 @@ TEST_F(EltwiseOpTest, TensorGeneralBroadcast) { ...@@ -774,6 +795,30 @@ TEST_F(EltwiseOpTest, TensorGeneralBroadcast) {
{1, 1, 2, 1}, {1, 2}, {1, 1, 2, 3}, {1, 0, 0, 0, 0, 0}); {1, 1, 2, 1}, {1, 2}, {1, 1, 2, 3}, {1, 0, 0, 0, 0, 0});
} }
TEST_F(EltwiseOpTest, TensorGeneralBroadcastGPU) {
TensorGeneralBroadcastEltwise<DeviceType::GPU, float, float>(
kernels::EltwiseType::SUM, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1},
{1, 2}, {1, 1, 2, 3}, {2, 3, 4, 6, 7, 8});
TensorGeneralBroadcastEltwise<DeviceType::GPU, float, float>(
kernels::EltwiseType::SUB, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1},
{1, 2}, {1, 1, 2, 3}, {0, 1, 2, 2, 3, 4});
TensorGeneralBroadcastEltwise<DeviceType::GPU, float, float>(
kernels::EltwiseType::PROD, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6},
{1, 1, 2, 1}, {1, 2}, {1, 1, 2, 3}, {1, 2, 3, 8, 10, 12});
TensorGeneralBroadcastEltwise<DeviceType::GPU, float, float>(
kernels::EltwiseType::DIV, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1},
{1, 2}, {1, 1, 2, 3}, {1, 2, 3, 2, 2.5, 3});
TensorGeneralBroadcastEltwise<DeviceType::GPU, float, float>(
kernels::EltwiseType::MIN, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1},
{1, 2}, {1, 1, 2, 3}, {1, 1, 1, 2, 2, 2});
TensorGeneralBroadcastEltwise<DeviceType::GPU, float, float>(
kernels::EltwiseType::MAX, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1},
{1, 2}, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6});
TensorGeneralBroadcastEltwise<DeviceType::GPU, float, float>(
kernels::EltwiseType::SQR_DIFF, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6},
{1, 1, 2, 1}, {1, 2}, {1, 1, 2, 3}, {0, 1, 4, 4, 9, 16});
}
TEST_F(EltwiseOpTest, QuantizedSum) { TEST_F(EltwiseOpTest, QuantizedSum) {
QuantizedSum({1, 32, 32, 16}); QuantizedSum({1, 32, 32, 16});
QuantizedSum({1, 31, 31, 17}); QuantizedSum({1, 31, 31, 17});
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册