未验证 提交 df10b17f 编写于 作者: L Liu Qi 提交者: GitHub

Merge pull request #342 from TCLResearchEurope/fix_eltwise_broadcast_channel

BUG: Fix broadcasting on channel dimension in EltwiseOp for GPU
...@@ -803,6 +803,28 @@ TEST_F(EltwiseOpTest, TensorGeneralBroadcastGPU) { ...@@ -803,6 +803,28 @@ TEST_F(EltwiseOpTest, TensorGeneralBroadcastGPU) {
TensorGeneralBroadcastEltwise<DeviceType::GPU, float, float>( TensorGeneralBroadcastEltwise<DeviceType::GPU, float, float>(
ops::EltwiseType::SQR_DIFF, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, ops::EltwiseType::SQR_DIFF, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6},
{1, 1, 2, 1}, {1, 2}, {1, 1, 2, 3}, {0, 1, 4, 4, 9, 16}); {1, 1, 2, 1}, {1, 2}, {1, 1, 2, 3}, {0, 1, 4, 4, 9, 16});
TensorGeneralBroadcastEltwise<DeviceType::GPU, float, float>(
ops::EltwiseType::SUM, {1, 1, 2, 5}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
{1, 1, 2, 1}, {1, 2}, {1, 1, 2, 5}, {1, 2, 3, 4, 5, 7, 8, 9, 10, 11});
TensorGeneralBroadcastEltwise<DeviceType::GPU, float, float>(
ops::EltwiseType::SUB, {1, 1, 2, 5}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
{1, 1, 2, 1}, {1, 2}, {1, 1, 2, 5}, {-1, 0, 1, 2, 3, 3, 4, 5, 6, 7});
TensorGeneralBroadcastEltwise<DeviceType::GPU, float, float>(
ops::EltwiseType::PROD, {1, 1, 2, 5}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
{1, 1, 2, 1}, {1, 2}, {1, 1, 2, 5}, {0, 1, 2, 3, 4, 10, 12, 14, 16, 18});
TensorGeneralBroadcastEltwise<DeviceType::GPU, float, float>(
ops::EltwiseType::DIV, {1, 1, 2, 5}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
{1, 1, 2, 1}, {1, 2}, {1, 1, 2, 5}, {0, 1, 2, 3, 4, 2.5, 3, 3.5, 4, 4.5});
TensorGeneralBroadcastEltwise<DeviceType::GPU, float, float>(
ops::EltwiseType::MIN, {1, 1, 2, 5}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
{1, 1, 2, 1}, {3, 4}, {1, 1, 2, 5}, {0, 1, 2, 3, 3, 4, 4, 4, 4, 4});
TensorGeneralBroadcastEltwise<DeviceType::GPU, float, float>(
ops::EltwiseType::MAX, {1, 1, 2, 5}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
{1, 1, 2, 1}, {3, 4}, {1, 1, 2, 5}, {3, 3, 3, 3, 4, 5, 6, 7, 8, 9});
TensorGeneralBroadcastEltwise<DeviceType::GPU, float, float>(
ops::EltwiseType::SQR_DIFF, {1, 1, 2, 5}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
{1, 1, 2, 1}, {2, 3}, {1, 1, 2, 5}, {4, 1, 0, 1, 4, 4, 9, 16, 25, 36});
} }
TEST_F(EltwiseOpTest, QuantizedSum) { TEST_F(EltwiseOpTest, QuantizedSum) {
......
...@@ -36,7 +36,7 @@ __kernel void eltwise(OUT_OF_RANGE_PARAMS ...@@ -36,7 +36,7 @@ __kernel void eltwise(OUT_OF_RANGE_PARAMS
#elif INPUT_TYPE == 3 #elif INPUT_TYPE == 3
DATA_TYPE4 in1 = READ_IMAGET(input1, SAMPLER, (int2)(chan_idx, 0)); DATA_TYPE4 in1 = READ_IMAGET(input1, SAMPLER, (int2)(chan_idx, 0));
#elif INPUT_TYPE == 4 #elif INPUT_TYPE == 4
DATA_TYPE4 tmp = READ_IMAGET(input1, SAMPLER, (int2)(pos, hb)); DATA_TYPE4 tmp = READ_IMAGET(input1, SAMPLER, (int2)(width_idx, hb));
DATA_TYPE4 in1 = (DATA_TYPE4)(tmp.x, tmp.x, tmp.x, tmp.x); DATA_TYPE4 in1 = (DATA_TYPE4)(tmp.x, tmp.x, tmp.x, tmp.x);
#else #else
DATA_TYPE4 in1 = READ_IMAGET(input1, SAMPLER, (int2)(pos, hb)); DATA_TYPE4 in1 = READ_IMAGET(input1, SAMPLER, (int2)(pos, hb));
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册