提交 a2dbc2a4 编写于 作者: 刘托

Merge branch 'support_broadcast_on_channel' into 'master'

support eltwise broadcast on channel dimension

See merge request !831
......@@ -35,6 +35,9 @@ __kernel void eltwise(OUT_OF_RANGE_PARAMS
DATA_TYPE4 in1 = READ_IMAGET(input1, SAMPLER, (int2)(chan_idx, batch_idx));
#elif INPUT_TYPE == 3
DATA_TYPE4 in1 = READ_IMAGET(input1, SAMPLER, (int2)(chan_idx, 0));
#elif INPUT_TYPE == 4
DATA_TYPE4 tmp = READ_IMAGET(input1, SAMPLER, (int2)(pos, hb));
DATA_TYPE4 in1 = (DATA_TYPE4)(tmp.x, tmp.x, tmp.x, tmp.x);
#else
DATA_TYPE4 in1 = READ_IMAGET(input1, SAMPLER, (int2)(pos, hb));
#endif
......@@ -80,7 +83,7 @@ __kernel void eltwise(OUT_OF_RANGE_PARAMS
#endif
#endif
#if INPUT_TYPE == 1
#if INPUT_TYPE == 1 || INPUT_TYPE == 4
#if ELTWISE_TYPE == 0 || ELTWISE_TYPE == 1 || ELTWISE_TYPE == 4 || \
ELTWISE_TYPE == 5 || ELTWISE_TYPE == 8 || ELTWISE_TYPE == 9
const int remain_channel = channel - 4 * chan_idx;
......
......@@ -77,12 +77,14 @@ MaceStatus EltwiseKernel<T>::Compute(
}
if (input1->dim_size() == 1) {
MACE_CHECK(input0->dim(3) == input1->dim(0))
<< "Element-Wise op only support channel dimension broadcast";
<< "Element-Wise op support broadcast on only-channel or non-channel dimension"; // NOLINT(whitespace/line_length)
} else {
MACE_CHECK((input0->dim(0) == input1->dim(0) || input1->dim(0) == 1) &&
input0->dim(3) == input1->dim(3) && input1->dim(1) == 1 &&
input1->dim(2) == 1)
<< "Element-Wise op only support channel dimension broadcast";
MACE_CHECK(((input0->dim(0) == input1->dim(0) || input1->dim(0) == 1)
&& input0->dim(3) == input1->dim(3) && input1->dim(1) == 1 &&
input1->dim(2) == 1) || (input0->dim(0) == input1->dim(0) &&
input0->dim(1) == input1->dim(1) && input0->dim(2) == input1->dim(2)
&& input1->dim(3) == 1))
<< "Element-Wise op support broadcast on only-channel or non-channel dimension"; // NOLINT(whitespace/line_length)
}
}
}
......@@ -129,10 +131,15 @@ MaceStatus EltwiseKernel<T>::Compute(
if (input1 == nullptr) {
built_options.emplace("-DINPUT_TYPE=1");
} else if (input0->size() != input1->size()) {
if (input1->dim(0) == 1 || input1->dim_size() == 1)
if (input0->dim(0) == input1->dim(0) && input0->dim(1) == input1->dim(1)
&& input0->dim(2) == input1->dim(2) && input1->dim(3) == 1) {
// only broadcast on channel
built_options.emplace("-DINPUT_TYPE=4");
} else if (input1->dim(0) == 1 || input1->dim_size() == 1) {
built_options.emplace("-DINPUT_TYPE=3");
else
} else {
built_options.emplace("-DINPUT_TYPE=2");
}
if (swapped) built_options.emplace("-DSWAPPED");
}
if (!coeff_.empty()) built_options.emplace("-DCOEFF_SUM");
......
......@@ -202,6 +202,27 @@ void TensorGeneralBroadcastEltwise(const kernels::EltwiseType type,
// Run
net.RunOp(D);
} else if (D == DeviceType::GPU) {
BufferToImage<D, T>(&net, "Input0", "InputImage0",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(&net, "Input1", "InputImage1",
kernels::BufferType::IN_OUT_CHANNEL);
auto op_builder =
OpDefBuilder("Eltwise", "EltwiseTest")
.AddIntArg("T", DataTypeToEnum<T>::v())
.Input("InputImage0")
.Input("InputImage1")
.AddIntArg("type", static_cast<int>(type))
.AddFloatsArg("coeff", coeff)
.OutputType({kernels::IsLogicalType(type) ? DT_INT32 : DT_FLOAT})
.Output("OutputImage");
op_builder.Finalize(net.NewOperatorDef());
// Run
net.RunOp(D);
ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL);
} else {
MACE_NOT_IMPLEMENTED;
}
......@@ -747,7 +768,7 @@ TEST_F(EltwiseOpTest, RandomTensorTensorHalf) {
{3, 31, 37, 17});
}
TEST_F(EltwiseOpTest, TensorGeneralBroadcast) {
TEST_F(EltwiseOpTest, TensorGeneralBroadcastCPU) {
TensorGeneralBroadcastEltwise<DeviceType::CPU, float, float>(
kernels::EltwiseType::SUM, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1},
{1, 2}, {1, 1, 2, 3}, {2, 3, 4, 6, 7, 8});
......@@ -774,6 +795,30 @@ TEST_F(EltwiseOpTest, TensorGeneralBroadcast) {
{1, 1, 2, 1}, {1, 2}, {1, 1, 2, 3}, {1, 0, 0, 0, 0, 0});
}
TEST_F(EltwiseOpTest, TensorGeneralBroadcastGPU) {
TensorGeneralBroadcastEltwise<DeviceType::GPU, float, float>(
kernels::EltwiseType::SUM, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1},
{1, 2}, {1, 1, 2, 3}, {2, 3, 4, 6, 7, 8});
TensorGeneralBroadcastEltwise<DeviceType::GPU, float, float>(
kernels::EltwiseType::SUB, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1},
{1, 2}, {1, 1, 2, 3}, {0, 1, 2, 2, 3, 4});
TensorGeneralBroadcastEltwise<DeviceType::GPU, float, float>(
kernels::EltwiseType::PROD, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6},
{1, 1, 2, 1}, {1, 2}, {1, 1, 2, 3}, {1, 2, 3, 8, 10, 12});
TensorGeneralBroadcastEltwise<DeviceType::GPU, float, float>(
kernels::EltwiseType::DIV, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1},
{1, 2}, {1, 1, 2, 3}, {1, 2, 3, 2, 2.5, 3});
TensorGeneralBroadcastEltwise<DeviceType::GPU, float, float>(
kernels::EltwiseType::MIN, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1},
{1, 2}, {1, 1, 2, 3}, {1, 1, 1, 2, 2, 2});
TensorGeneralBroadcastEltwise<DeviceType::GPU, float, float>(
kernels::EltwiseType::MAX, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1},
{1, 2}, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6});
TensorGeneralBroadcastEltwise<DeviceType::GPU, float, float>(
kernels::EltwiseType::SQR_DIFF, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6},
{1, 1, 2, 1}, {1, 2}, {1, 1, 2, 3}, {0, 1, 4, 4, 9, 16});
}
TEST_F(EltwiseOpTest, QuantizedSum) {
QuantizedSum({1, 32, 32, 16});
QuantizedSum({1, 31, 31, 17});
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册