提交 25d2ad2d 编写于 作者: L liuqi

Fix eltwise sub and div swapped bug.

上级 f078a265
...@@ -114,6 +114,7 @@ inline void TensorVector(const EltwiseType type, ...@@ -114,6 +114,7 @@ inline void TensorVector(const EltwiseType type,
const index_t batch, const index_t batch,
const index_t channel, const index_t channel,
const index_t hw, const index_t hw,
const bool swapped,
float *output) { float *output) {
switch (type) { switch (type) {
case SUM: case SUM:
...@@ -129,13 +130,26 @@ inline void TensorVector(const EltwiseType type, ...@@ -129,13 +130,26 @@ inline void TensorVector(const EltwiseType type,
} }
break; break;
case SUB: case SUB:
if (swapped) {
#pragma omp parallel for collapse(3) #pragma omp parallel for collapse(3)
for (index_t b = 0; b < batch; ++b) { for (index_t b = 0; b < batch; ++b) {
for (index_t c = 0; c < channel; ++c) { for (index_t c = 0; c < channel; ++c) {
for (index_t i = 0; i < hw; ++i) { for (index_t i = 0; i < hw; ++i) {
const index_t idx0 = (b * channel + c) * hw + i; const index_t idx0 = (b * channel + c) * hw + i;
const index_t idx1 = b * channel + c; const index_t idx1 = b * channel + c;
output[idx0] = input0[idx0] - input1[idx1]; output[idx0] = input1[idx1] - input0[idx0];
}
}
}
} else {
#pragma omp parallel for collapse(3)
for (index_t b = 0; b < batch; ++b) {
for (index_t c = 0; c < channel; ++c) {
for (index_t i = 0; i < hw; ++i) {
const index_t idx0 = (b * channel + c) * hw + i;
const index_t idx1 = b * channel + c;
output[idx0] = input0[idx0] - input1[idx1];
}
} }
} }
} }
...@@ -153,13 +167,26 @@ inline void TensorVector(const EltwiseType type, ...@@ -153,13 +167,26 @@ inline void TensorVector(const EltwiseType type,
} }
break; break;
case DIV: case DIV:
if (swapped) {
#pragma omp parallel for collapse(3) #pragma omp parallel for collapse(3)
for (index_t b = 0; b < batch; ++b) { for (index_t b = 0; b < batch; ++b) {
for (index_t c = 0; c < channel; ++c) { for (index_t c = 0; c < channel; ++c) {
for (index_t i = 0; i < hw; ++i) { for (index_t i = 0; i < hw; ++i) {
const index_t idx0 = (b * channel + c) * hw + i; const index_t idx0 = (b * channel + c) * hw + i;
const index_t idx1 = b * channel + c; const index_t idx1 = b * channel + c;
output[idx0] = input0[idx0] / input1[idx1]; output[idx0] = input1[idx1] / input0[idx0];
}
}
}
} else {
#pragma omp parallel for collapse(3)
for (index_t b = 0; b < batch; ++b) {
for (index_t c = 0; c < channel; ++c) {
for (index_t i = 0; i < hw; ++i) {
const index_t idx0 = (b * channel + c) * hw + i;
const index_t idx1 = b * channel + c;
output[idx0] = input0[idx0] / input1[idx1];
}
} }
} }
} }
...@@ -283,12 +310,14 @@ struct EltwiseFunctor<DeviceType::CPU, float>: EltwiseFunctorBase { ...@@ -283,12 +310,14 @@ struct EltwiseFunctor<DeviceType::CPU, float>: EltwiseFunctorBase {
const Tensor *input1, const Tensor *input1,
Tensor *output, Tensor *output,
StatsFuture *future) { StatsFuture *future) {
bool swapped = false;
if (input1 != nullptr) { if (input1 != nullptr) {
MACE_CHECK(input0->dim_size() == input1->dim_size()) MACE_CHECK(input0->dim_size() == input1->dim_size())
<< "Inputs of Eltwise op must be same shape"; << "Inputs of Eltwise op must be same shape";
if (input0->size() != input1->size()) { if (input0->size() != input1->size()) {
if (input0->size() < input1->size()) { if (input0->size() < input1->size()) {
std::swap(input0, input1); std::swap(input0, input1);
swapped = true;
} }
MACE_CHECK(input0->dim(0) == input1->dim(0) && MACE_CHECK(input0->dim(0) == input1->dim(0) &&
input0->dim(1) == input1->dim(1) && input0->dim(1) == input1->dim(1) &&
...@@ -316,7 +345,7 @@ struct EltwiseFunctor<DeviceType::CPU, float>: EltwiseFunctorBase { ...@@ -316,7 +345,7 @@ struct EltwiseFunctor<DeviceType::CPU, float>: EltwiseFunctorBase {
const index_t channel = input0->dim(1); const index_t channel = input0->dim(1);
const index_t hw = input0->dim(2) * input0->dim(3); const index_t hw = input0->dim(2) * input0->dim(3);
TensorVector(type_, input0_ptr, input1_ptr, TensorVector(type_, input0_ptr, input1_ptr,
batch, channel, hw, output_ptr); batch, channel, hw, swapped, output_ptr);
} else { } else {
if (!coeff_.empty() && type_ == SUM) { if (!coeff_.empty() && type_ == SUM) {
#pragma omp parallel for #pragma omp parallel for
......
...@@ -45,11 +45,19 @@ __kernel void eltwise(KERNEL_ERROR_PARAMS ...@@ -45,11 +45,19 @@ __kernel void eltwise(KERNEL_ERROR_PARAMS
out = in0 + in1; out = in0 + in1;
#endif #endif
#elif ELTWISE_TYPE == 1 #elif ELTWISE_TYPE == 1
out = in0 - in1; #ifdef SWAPPED
out = in1 - in0;
#else
out = in0 - in1;
#endif
#elif ELTWISE_TYPE == 2 #elif ELTWISE_TYPE == 2
out = in0 * in1; out = in0 * in1;
#elif ELTWISE_TYPE == 3 #elif ELTWISE_TYPE == 3
out = in0 / in1; #ifdef SWAPPED
out = in1 / in0;
#else
out = in0 / in1;
#endif
#elif ELTWISE_TYPE == 4 #elif ELTWISE_TYPE == 4
out = fmin(in0, in1); out = fmin(in0, in1);
#elif ELTWISE_TYPE == 5 #elif ELTWISE_TYPE == 5
......
...@@ -25,12 +25,14 @@ void EltwiseFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input0, ...@@ -25,12 +25,14 @@ void EltwiseFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input0,
const Tensor *input1, const Tensor *input1,
Tensor *output, Tensor *output,
StatsFuture *future) { StatsFuture *future) {
bool swapped = false;
if (input1 != nullptr) { if (input1 != nullptr) {
MACE_CHECK(input0->dim_size() == input1->dim_size()) MACE_CHECK(input0->dim_size() == input1->dim_size())
<< "Inputs of Eltwise op must be same shape"; << "Inputs of Eltwise op must be same shape";
if (input0->size() != input1->size()) { if (input0->size() != input1->size()) {
if (input0->size() < input1->size()) { if (input0->size() < input1->size()) {
std::swap(input0, input1); std::swap(input0, input1);
swapped = true;
} }
MACE_CHECK(input0->dim(0) == input1->dim(0) && MACE_CHECK(input0->dim(0) == input1->dim(0) &&
input1->dim(1) == 1 && input1->dim(1) == 1 &&
...@@ -62,9 +64,10 @@ void EltwiseFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input0, ...@@ -62,9 +64,10 @@ void EltwiseFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input0,
built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt)); built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt));
built_options.emplace(MakeString("-DELTWISE_TYPE=", type_)); built_options.emplace(MakeString("-DELTWISE_TYPE=", type_));
if (input1 == nullptr) { if (input1 == nullptr) {
built_options.emplace(MakeString("-DINPUT_TYPE=1")); built_options.emplace("-DINPUT_TYPE=1");
} else if (input0->size() != input1->size()) { } else if (input0->size() != input1->size()) {
built_options.emplace(MakeString("-DINPUT_TYPE=2")); built_options.emplace("-DINPUT_TYPE=2");
if (swapped) built_options.emplace("-DSWAPPED");
} }
if (!coeff_.empty()) built_options.emplace("-DCOEFF_SUM"); if (!coeff_.empty()) built_options.emplace("-DCOEFF_SUM");
......
...@@ -238,6 +238,12 @@ TEST_F(EltwiseOpTest, CPUSimpleTensorVector) { ...@@ -238,6 +238,12 @@ TEST_F(EltwiseOpTest, CPUSimpleTensorVector) {
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
{1, 1, 1, 5}, {1, 2, 3, 4, 5}, {1, 1, 1, 5}, {1, 2, 3, 4, 5},
{0, 0, 0, 0, 0, 5, 5, 5, 5, 5}); {0, 0, 0, 0, 0, 5, 5, 5, 5, 5});
SimpleTensorEltwise<DeviceType::CPU, float>(kernels::EltwiseType::SUB,
{1, 1, 1, 5}, {1, 2, 3, 4, 5},
{1, 2, 1, 5},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
{0, 0, 0, 0, 0,
-5, -5, -5, -5, -5});
SimpleTensorEltwise<DeviceType::CPU, float>(kernels::EltwiseType::PROD, SimpleTensorEltwise<DeviceType::CPU, float>(kernels::EltwiseType::PROD,
{1, 1, 1, 3}, {1, 2, 3}, {1, 1, 1, 3}, {1, 2, 3},
{1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6},
...@@ -247,6 +253,11 @@ TEST_F(EltwiseOpTest, CPUSimpleTensorVector) { ...@@ -247,6 +253,11 @@ TEST_F(EltwiseOpTest, CPUSimpleTensorVector) {
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
{1, 1, 1, 5}, {1, 1, 1, 1, 5}, {1, 1, 1, 5}, {1, 1, 1, 1, 5},
{1, 2, 3, 4, 1, 6, 7, 8, 9, 2}); {1, 2, 3, 4, 1, 6, 7, 8, 9, 2});
SimpleTensorEltwise<DeviceType::CPU, float>(kernels::EltwiseType::DIV,
{1, 1, 1, 5}, {1, 1, 1, 2, 4},
{1, 2, 1, 5},
{1, 1, 1, 2, 2, 1, 1, 1, 1, 1},
{1, 1, 1, 1, 2, 1, 1, 1, 2, 4});
SimpleTensorEltwise<DeviceType::CPU, float>(kernels::EltwiseType::MIN, SimpleTensorEltwise<DeviceType::CPU, float>(kernels::EltwiseType::MIN,
{1, 1, 1, 5}, {1, 2, 3, 4, 5}, {1, 1, 1, 5}, {1, 2, 3, 4, 5},
{1, 2, 1, 5}, {1, 2, 1, 5},
...@@ -276,6 +287,12 @@ TEST_F(EltwiseOpTest, GPUSimpleTensorVector) { ...@@ -276,6 +287,12 @@ TEST_F(EltwiseOpTest, GPUSimpleTensorVector) {
{1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
{1, 1, 1, 5}, {1, 2, 3, 4, 5}, {1, 1, 1, 5}, {1, 2, 3, 4, 5},
{0, 0, 0, 0, 0, 5, 5, 5, 5, 5}); {0, 0, 0, 0, 0, 5, 5, 5, 5, 5});
SimpleTensorEltwise<DeviceType::OPENCL, float>(
kernels::EltwiseType::SUB,
{1, 1, 1, 5}, {1, 2, 3, 4, 5},
{1, 2, 1, 5},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
{0, 0, 0, 0, 0, -5, -5, -5, -5, -5});
SimpleTensorEltwise<DeviceType::OPENCL, float>( SimpleTensorEltwise<DeviceType::OPENCL, float>(
kernels::EltwiseType::PROD, kernels::EltwiseType::PROD,
{1, 1, 1, 3}, {1, 2, 3}, {1, 1, 1, 3}, {1, 2, 3},
...@@ -286,6 +303,12 @@ TEST_F(EltwiseOpTest, GPUSimpleTensorVector) { ...@@ -286,6 +303,12 @@ TEST_F(EltwiseOpTest, GPUSimpleTensorVector) {
{1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
{1, 1, 1, 5}, {1, 1, 1, 1, 5}, {1, 1, 1, 5}, {1, 1, 1, 1, 5},
{1, 2, 3, 4, 1, 6, 7, 8, 9, 2}); {1, 2, 3, 4, 1, 6, 7, 8, 9, 2});
SimpleTensorEltwise<DeviceType::OPENCL, float>(
kernels::EltwiseType::DIV,
{1, 1, 1, 5}, {1, 1, 1, 2, 4},
{1, 2, 1, 5},
{1, 1, 1, 2, 2, 1, 1, 1, 1, 1},
{1, 1, 1, 1, 2, 1, 1, 1, 2, 4});
SimpleTensorEltwise<DeviceType::OPENCL, float>( SimpleTensorEltwise<DeviceType::OPENCL, float>(
kernels::EltwiseType::MIN, kernels::EltwiseType::MIN,
{1, 1, 1, 5}, {1, 2, 3, 4, 5}, {1, 1, 1, 5}, {1, 2, 3, 4, 5},
...@@ -530,6 +553,10 @@ TEST_F(EltwiseOpTest, RandomTensorVecFloat) { ...@@ -530,6 +553,10 @@ TEST_F(EltwiseOpTest, RandomTensorVecFloat) {
{1, 32, 32, 16}, {1, 1, 1, 16}); {1, 32, 32, 16}, {1, 1, 1, 16});
RandomTensorEltwise<float>(kernels::EltwiseType::SUB, RandomTensorEltwise<float>(kernels::EltwiseType::SUB,
{5, 32, 32, 16}, {5, 1, 1, 16}); {5, 32, 32, 16}, {5, 1, 1, 16});
RandomTensorEltwise<float>(kernels::EltwiseType::SUB,
{5, 1, 1, 16}, {5, 32, 32, 16});
RandomTensorEltwise<float>(kernels::EltwiseType::PROD,
{1, 31, 37, 17}, {1, 1, 1, 17});
RandomTensorEltwise<float>(kernels::EltwiseType::PROD, RandomTensorEltwise<float>(kernels::EltwiseType::PROD,
{1, 1, 1, 17}, {1, 31, 37, 17}); {1, 1, 1, 17}, {1, 31, 37, 17});
RandomTensorEltwise<float>(kernels::EltwiseType::DIV, RandomTensorEltwise<float>(kernels::EltwiseType::DIV,
...@@ -547,8 +574,12 @@ TEST_F(EltwiseOpTest, RandomTensorVecHalf) { ...@@ -547,8 +574,12 @@ TEST_F(EltwiseOpTest, RandomTensorVecHalf) {
{1, 32, 32, 16}, {1, 1, 1, 16}); {1, 32, 32, 16}, {1, 1, 1, 16});
RandomTensorEltwise<half>(kernels::EltwiseType::SUB, RandomTensorEltwise<half>(kernels::EltwiseType::SUB,
{3, 32, 32, 16}, {3, 1, 1, 16}); {3, 32, 32, 16}, {3, 1, 1, 16});
RandomTensorEltwise<half>(kernels::EltwiseType::SUB,
{3, 1, 1, 16}, {3, 32, 32, 16});
RandomTensorEltwise<half>(kernels::EltwiseType::PROD, RandomTensorEltwise<half>(kernels::EltwiseType::PROD,
{1, 1, 1, 17}, {1, 31, 37, 17}); {1, 1, 1, 17}, {1, 31, 37, 17});
RandomTensorEltwise<half>(kernels::EltwiseType::DIV,
{5, 31, 37, 17}, {5, 1, 1, 17});
RandomTensorEltwise<half>(kernels::EltwiseType::DIV, RandomTensorEltwise<half>(kernels::EltwiseType::DIV,
{5, 1, 1, 17}, {5, 31, 37, 17}); {5, 1, 1, 17}, {5, 31, 37, 17});
RandomTensorEltwise<half>(kernels::EltwiseType::MIN, RandomTensorEltwise<half>(kernels::EltwiseType::MIN,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册