提交 25d2ad2d 编写于 作者: L liuqi

Fix eltwise sub and div swapped bug.

上级 f078a265
......@@ -114,6 +114,7 @@ inline void TensorVector(const EltwiseType type,
const index_t batch,
const index_t channel,
const index_t hw,
const bool swapped,
float *output) {
switch (type) {
case SUM:
......@@ -129,13 +130,26 @@ inline void TensorVector(const EltwiseType type,
}
break;
case SUB:
if (swapped) {
#pragma omp parallel for collapse(3)
for (index_t b = 0; b < batch; ++b) {
for (index_t c = 0; c < channel; ++c) {
for (index_t i = 0; i < hw; ++i) {
const index_t idx0 = (b * channel + c) * hw + i;
const index_t idx1 = b * channel + c;
output[idx0] = input0[idx0] - input1[idx1];
for (index_t b = 0; b < batch; ++b) {
for (index_t c = 0; c < channel; ++c) {
for (index_t i = 0; i < hw; ++i) {
const index_t idx0 = (b * channel + c) * hw + i;
const index_t idx1 = b * channel + c;
output[idx0] = input1[idx1] - input0[idx0];
}
}
}
} else {
#pragma omp parallel for collapse(3)
for (index_t b = 0; b < batch; ++b) {
for (index_t c = 0; c < channel; ++c) {
for (index_t i = 0; i < hw; ++i) {
const index_t idx0 = (b * channel + c) * hw + i;
const index_t idx1 = b * channel + c;
output[idx0] = input0[idx0] - input1[idx1];
}
}
}
}
......@@ -153,13 +167,26 @@ inline void TensorVector(const EltwiseType type,
}
break;
case DIV:
if (swapped) {
#pragma omp parallel for collapse(3)
for (index_t b = 0; b < batch; ++b) {
for (index_t c = 0; c < channel; ++c) {
for (index_t i = 0; i < hw; ++i) {
const index_t idx0 = (b * channel + c) * hw + i;
const index_t idx1 = b * channel + c;
output[idx0] = input0[idx0] / input1[idx1];
for (index_t b = 0; b < batch; ++b) {
for (index_t c = 0; c < channel; ++c) {
for (index_t i = 0; i < hw; ++i) {
const index_t idx0 = (b * channel + c) * hw + i;
const index_t idx1 = b * channel + c;
output[idx0] = input1[idx1] / input0[idx0];
}
}
}
} else {
#pragma omp parallel for collapse(3)
for (index_t b = 0; b < batch; ++b) {
for (index_t c = 0; c < channel; ++c) {
for (index_t i = 0; i < hw; ++i) {
const index_t idx0 = (b * channel + c) * hw + i;
const index_t idx1 = b * channel + c;
output[idx0] = input0[idx0] / input1[idx1];
}
}
}
}
......@@ -283,12 +310,14 @@ struct EltwiseFunctor<DeviceType::CPU, float>: EltwiseFunctorBase {
const Tensor *input1,
Tensor *output,
StatsFuture *future) {
bool swapped = false;
if (input1 != nullptr) {
MACE_CHECK(input0->dim_size() == input1->dim_size())
<< "Inputs of Eltwise op must be same shape";
if (input0->size() != input1->size()) {
if (input0->size() < input1->size()) {
std::swap(input0, input1);
swapped = true;
}
MACE_CHECK(input0->dim(0) == input1->dim(0) &&
input0->dim(1) == input1->dim(1) &&
......@@ -316,7 +345,7 @@ struct EltwiseFunctor<DeviceType::CPU, float>: EltwiseFunctorBase {
const index_t channel = input0->dim(1);
const index_t hw = input0->dim(2) * input0->dim(3);
TensorVector(type_, input0_ptr, input1_ptr,
batch, channel, hw, output_ptr);
batch, channel, hw, swapped, output_ptr);
} else {
if (!coeff_.empty() && type_ == SUM) {
#pragma omp parallel for
......
......@@ -45,11 +45,19 @@ __kernel void eltwise(KERNEL_ERROR_PARAMS
out = in0 + in1;
#endif
#elif ELTWISE_TYPE == 1
out = in0 - in1;
#ifdef SWAPPED
out = in1 - in0;
#else
out = in0 - in1;
#endif
#elif ELTWISE_TYPE == 2
out = in0 * in1;
#elif ELTWISE_TYPE == 3
out = in0 / in1;
#ifdef SWAPPED
out = in1 / in0;
#else
out = in0 / in1;
#endif
#elif ELTWISE_TYPE == 4
out = fmin(in0, in1);
#elif ELTWISE_TYPE == 5
......
......@@ -25,12 +25,14 @@ void EltwiseFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input0,
const Tensor *input1,
Tensor *output,
StatsFuture *future) {
bool swapped = false;
if (input1 != nullptr) {
MACE_CHECK(input0->dim_size() == input1->dim_size())
<< "Inputs of Eltwise op must be same shape";
if (input0->size() != input1->size()) {
if (input0->size() < input1->size()) {
std::swap(input0, input1);
swapped = true;
}
MACE_CHECK(input0->dim(0) == input1->dim(0) &&
input1->dim(1) == 1 &&
......@@ -62,9 +64,10 @@ void EltwiseFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input0,
built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt));
built_options.emplace(MakeString("-DELTWISE_TYPE=", type_));
if (input1 == nullptr) {
built_options.emplace(MakeString("-DINPUT_TYPE=1"));
built_options.emplace("-DINPUT_TYPE=1");
} else if (input0->size() != input1->size()) {
built_options.emplace(MakeString("-DINPUT_TYPE=2"));
built_options.emplace("-DINPUT_TYPE=2");
if (swapped) built_options.emplace("-DSWAPPED");
}
if (!coeff_.empty()) built_options.emplace("-DCOEFF_SUM");
......
......@@ -238,6 +238,12 @@ TEST_F(EltwiseOpTest, CPUSimpleTensorVector) {
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
{1, 1, 1, 5}, {1, 2, 3, 4, 5},
{0, 0, 0, 0, 0, 5, 5, 5, 5, 5});
SimpleTensorEltwise<DeviceType::CPU, float>(kernels::EltwiseType::SUB,
{1, 1, 1, 5}, {1, 2, 3, 4, 5},
{1, 2, 1, 5},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
{0, 0, 0, 0, 0,
-5, -5, -5, -5, -5});
SimpleTensorEltwise<DeviceType::CPU, float>(kernels::EltwiseType::PROD,
{1, 1, 1, 3}, {1, 2, 3},
{1, 2, 1, 3}, {1, 2, 3, 4, 5, 6},
......@@ -247,6 +253,11 @@ TEST_F(EltwiseOpTest, CPUSimpleTensorVector) {
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
{1, 1, 1, 5}, {1, 1, 1, 1, 5},
{1, 2, 3, 4, 1, 6, 7, 8, 9, 2});
SimpleTensorEltwise<DeviceType::CPU, float>(kernels::EltwiseType::DIV,
{1, 1, 1, 5}, {1, 1, 1, 2, 4},
{1, 2, 1, 5},
{1, 1, 1, 2, 2, 1, 1, 1, 1, 1},
{1, 1, 1, 1, 2, 1, 1, 1, 2, 4});
SimpleTensorEltwise<DeviceType::CPU, float>(kernels::EltwiseType::MIN,
{1, 1, 1, 5}, {1, 2, 3, 4, 5},
{1, 2, 1, 5},
......@@ -276,6 +287,12 @@ TEST_F(EltwiseOpTest, GPUSimpleTensorVector) {
{1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
{1, 1, 1, 5}, {1, 2, 3, 4, 5},
{0, 0, 0, 0, 0, 5, 5, 5, 5, 5});
SimpleTensorEltwise<DeviceType::OPENCL, float>(
kernels::EltwiseType::SUB,
{1, 1, 1, 5}, {1, 2, 3, 4, 5},
{1, 2, 1, 5},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
{0, 0, 0, 0, 0, -5, -5, -5, -5, -5});
SimpleTensorEltwise<DeviceType::OPENCL, float>(
kernels::EltwiseType::PROD,
{1, 1, 1, 3}, {1, 2, 3},
......@@ -286,6 +303,12 @@ TEST_F(EltwiseOpTest, GPUSimpleTensorVector) {
{1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
{1, 1, 1, 5}, {1, 1, 1, 1, 5},
{1, 2, 3, 4, 1, 6, 7, 8, 9, 2});
SimpleTensorEltwise<DeviceType::OPENCL, float>(
kernels::EltwiseType::DIV,
{1, 1, 1, 5}, {1, 1, 1, 2, 4},
{1, 2, 1, 5},
{1, 1, 1, 2, 2, 1, 1, 1, 1, 1},
{1, 1, 1, 1, 2, 1, 1, 1, 2, 4});
SimpleTensorEltwise<DeviceType::OPENCL, float>(
kernels::EltwiseType::MIN,
{1, 1, 1, 5}, {1, 2, 3, 4, 5},
......@@ -530,6 +553,10 @@ TEST_F(EltwiseOpTest, RandomTensorVecFloat) {
{1, 32, 32, 16}, {1, 1, 1, 16});
RandomTensorEltwise<float>(kernels::EltwiseType::SUB,
{5, 32, 32, 16}, {5, 1, 1, 16});
RandomTensorEltwise<float>(kernels::EltwiseType::SUB,
{5, 1, 1, 16}, {5, 32, 32, 16});
RandomTensorEltwise<float>(kernels::EltwiseType::PROD,
{1, 31, 37, 17}, {1, 1, 1, 17});
RandomTensorEltwise<float>(kernels::EltwiseType::PROD,
{1, 1, 1, 17}, {1, 31, 37, 17});
RandomTensorEltwise<float>(kernels::EltwiseType::DIV,
......@@ -547,8 +574,12 @@ TEST_F(EltwiseOpTest, RandomTensorVecHalf) {
{1, 32, 32, 16}, {1, 1, 1, 16});
RandomTensorEltwise<half>(kernels::EltwiseType::SUB,
{3, 32, 32, 16}, {3, 1, 1, 16});
RandomTensorEltwise<half>(kernels::EltwiseType::SUB,
{3, 1, 1, 16}, {3, 32, 32, 16});
RandomTensorEltwise<half>(kernels::EltwiseType::PROD,
{1, 1, 1, 17}, {1, 31, 37, 17});
RandomTensorEltwise<half>(kernels::EltwiseType::DIV,
{5, 31, 37, 17}, {5, 1, 1, 17});
RandomTensorEltwise<half>(kernels::EltwiseType::DIV,
{5, 1, 1, 17}, {5, 31, 37, 17});
RandomTensorEltwise<half>(kernels::EltwiseType::MIN,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册